00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00032 #include "dnxError.h"
00033 #include "dnxDebug.h"
00034 #include "dnxTransport.h"
00035 #include "dnxClntProt.h"
00036 #include "dnxCfgParser.h"
00037 #include "dnxWLM.h"
00038 #include "dnxPlugin.h"
00039 #include "dnxLogging.h"
00040
00041 #include <sys/types.h>
00042 #include <sys/stat.h>
00043 #include <sys/wait.h>
00044 #include <sys/file.h>
00045 #include <stdio.h>
00046 #include <stdlib.h>
00047 #include <stdint.h>
00048 #include <stdarg.h>
00049 #include <unistd.h>
00050 #include <string.h>
00051 #include <signal.h>
00052 #include <pthread.h>
00053 #include <fcntl.h>
00054 #include <errno.h>
00055 #include <pwd.h>
00056 #include <grp.h>
00057
00058 #if HAVE_CONFIG_H
00059 # include "config.h"
00060 #else
00061 # define VERSION "<unknown>"
00062 # define PACKAGE_BUGREPORT "<unknown>"
00063 #endif
00064
00065 #if HAVE_GETOPT_LONG
00066 #define _GNU_SOURCE
00067 #include <getopt.h>
00068 #endif
00069
00070 #ifndef SYSCONFDIR
00071 # define SYSCONFDIR "/etc"
00072 #endif
00073
00074 #ifndef SYSLOGDIR
00075 # define SYSLOGDIR "/var/log"
00076 #endif
00077
00078 #ifndef SYSRUNPATH
00079 # define SYSRUNPATH "/var/run/dnx"
00080 #endif
00081
00082 #ifndef LIBEXECDIR
00083 # define LIBEXECDIR "/usr/lib"
00084 #endif
00085
00086 #ifndef DNXUSER
00087 # define DNXUSER "nagios"
00088 #endif
00089
00090 #ifndef DNXGROUP
00091 # define DNXGROUP "nagios"
00092 #endif
00093
00094 #ifndef COMPILE_FLAGS
00095 # define COMPILE_FLAGS "<unknown>"
00096 #endif
00097
00098
00099 #define DNX_DEFAULT_NODE_CONFIG_FILE SYSCONFDIR "/dnxClient.cfg"
00100 #define DNX_DEFAULT_LOG SYSLOGDIR "/dnxcld.log"
00101 #define DNX_DEFAULT_DBGLOG SYSLOGDIR "/dnxcld.debug.log"
00102 #define DNX_DEFAULT_PLUGIN_PATH LIBEXECDIR
00103 #define DNX_DEFAULT_RUN_PATH SYSRUNPATH
00104 #define DNX_DEFAULT_USER DNXUSER
00105 #define DNX_DEFAULT_GROUP DNXGROUP
00106
00107 #define elemcount(x) (sizeof(x)/sizeof(*(x)))
00108
00109
00110 typedef struct DnxCfgData
00111 {
00112 char * agentUrl;
00113 char * pluginPath;
00114 char * user;
00115 char * group;
00116 char * runPath;
00117 char * logFilePath;
00118 char * debugFilePath;
00119 unsigned debugLevel;
00120 DnxWlmCfgData wlm;
00121 } DnxCfgData;
00122
00123
00124 static DnxCfgData s_cfg;
00125 static DnxCfgParser * s_parser;
00126 static DnxWlm * s_wlm = 0;
00127 static DnxChannel * s_agent;
00128 static char * s_progname;
00129 static char * s_cfgfile;
00130 static char * s_cmdover = 0;
00131 static int s_dbgflag = 0;
00132 static int s_shutdown = 0;
00133 static int s_reconfig = 0;
00134 static int s_debugsig = 0;
00135 static int s_lockfd = -1;
00136
00137
00138
00139
00140
00149 static char * versionText(char * base)
00150 {
00151 char buf[1024];
00152 snprintf(buf, sizeof(buf) - 1,
00153 "\n"
00154 " %s Version " VERSION ", Built " __DATE__ " at " __TIME__ ".\n"
00155 " Distributed Nagios eXecutor (DNX) Client Daemon.\n"
00156 " Please report bugs to <" PACKAGE_BUGREPORT ">.\n"
00157 "\n"
00158 " Default configuration:\n"
00159 " Default config file: " DNX_DEFAULT_NODE_CONFIG_FILE "\n"
00160 " Default log file: " DNX_DEFAULT_LOG "\n"
00161 " Default debug log file: " DNX_DEFAULT_DBGLOG "\n"
00162 " Default system run path: " DNX_DEFAULT_RUN_PATH "\n"
00163 " Default daemon user: " DNX_DEFAULT_USER "\n"
00164 " Default daemon group: " DNX_DEFAULT_GROUP "\n"
00165
00166 #if DEBUG_HEAP
00167 " Debug heap is ENABLED.\n"
00168 #endif
00169 #if DEBUG_LOCKS
00170 " Debug locks are ENABLED.\n"
00171 #endif
00172 , base
00173 );
00174 return xstrdup(buf);
00175 }
00176
00177
00178
00184 static void version(FILE * fp, char * base)
00185 {
00186 char * vertxt = versionText(base);
00187 if (vertxt)
00188 {
00189 fprintf(fp, "%s\n", vertxt);
00190 xfree(vertxt);
00191 vertxt = 0;
00192 }
00193 }
00194
00195
00196
00201 static void usage(char * base)
00202 {
00203
00204 #if HAVE_GETOPT_LONG
00205 # define OL_CFGFILE ", --cfgfile "
00206 # define OL_LOGFILE ", --logfile "
00207 # define OL_DBGFILE ", --dbgfile "
00208 # define OL_DBGLEVEL ", --dbglevel"
00209 # define OL_DEBUG ", --debug "
00210 # define OL_RUNPATH ", --runpath "
00211 # define OL_USER ", --user "
00212 # define OL_GROUP ", --group "
00213 # define OL_VERSION ", --version "
00214 # define OL_HELP ", --help "
00215 #else
00216 # define OL_CFGFILE
00217 # define OL_LOGFILE
00218 # define OL_DBGFILE
00219 # define OL_DBGLEVEL
00220 # define OL_DEBUG
00221 # define OL_RUNPATH
00222 # define OL_USER
00223 # define OL_GROUP
00224 # define OL_VERSION
00225 # define OL_HELP
00226 #endif
00227
00228 version(stderr, base);
00229 fprintf(stderr,
00230 " Usage: %s [options]\n"
00231 " Where [options] are:\n"
00232 " -c" OL_CFGFILE " <file> specify the file and path of the config file.\n"
00233 " -l" OL_LOGFILE " <file> specify the file and path of the log file.\n"
00234 " -D" OL_DBGFILE " <file> specify the file and path of the debug log file.\n"
00235 " -g" OL_DBGLEVEL " <value> specify the level of debugging output.\n"
00236 " -d" OL_DEBUG " enable debug mode (will not become a daemon).\n"
00237 " -r" OL_RUNPATH " <path> specify the path of the lock/pid file.\n"
00238 " -U" OL_USER " <user> specify the DNX client user name or id.\n"
00239 " -G" OL_GROUP " <group> specify the DNX client group name or id.\n"
00240 " -v" OL_VERSION " display DNX client version and exit.\n"
00241 " -h" OL_HELP " display this help screen and exit.\n"
00242 "\n",
00243 base
00244 );
00245 exit(-1);
00246 }
00247
00248
00249
00260 static int appendString(char ** spp, char * fmt, ... )
00261 {
00262 char buf[1024];
00263 char * newstr;
00264 size_t strsz;
00265 va_list ap;
00266
00267
00268 va_start(ap, fmt);
00269 vsnprintf(buf, sizeof buf, fmt, ap);
00270 va_end(ap);
00271
00272
00273 strsz = strlen(buf) + 1;
00274 if ((newstr = xrealloc(*spp, (*spp? strlen(*spp): 0) + strsz)) == 0) return DNX_ERR_MEMORY;
00275 if (*spp == 0)
00276 *newstr = 0;
00277
00278
00279 strcat(newstr, buf);
00280 *spp = newstr;
00281 return 0;
00282 }
00283
00284
00285
00293 static int getOptions(int argc, char ** argv)
00294 {
00295
00296 extern char * optarg;
00297 extern int opterr, optopt;
00298
00299 static char opts[] = "c:dr:g:l:D:U:G:vh";
00300
00301 #if HAVE_GETOPT_LONG
00302 static struct option longopts[] =
00303 {
00304 { "cfgfile", required_argument, 0, 'c' },
00305 { "logfile", required_argument, 0, 'l' },
00306 { "dbgfile", required_argument, 0, 'D' },
00307 { "dbglevel", required_argument, 0, 'g' },
00308 { "debug", no_argument, 0, 'd' },
00309 { "runpath", required_argument, 0, 'r' },
00310 { "version", no_argument, 0, 'v' },
00311 { "user", required_argument, 0, 'U' },
00312 { "group", required_argument, 0, 'G' },
00313 { "help", no_argument, 0, 'h' },
00314 { 0, 0, 0, 0 },
00315 };
00316 #endif
00317
00318 int ch;
00319 char * cp;
00320 char * logfile = 0;
00321 char * dbgfile = 0;
00322 char * dbglvl = 0;
00323 char * user = 0;
00324 char * group = 0;
00325 char * runpath = 0;
00326 size_t rplen;
00327
00328
00329 s_progname = (char *)((cp = strrchr(argv[0], '/')) != 0 ? (cp + 1) : argv[0]);
00330
00331 opterr = 0;
00332
00333 #if HAVE_GETOPT_LONG
00334 while ((ch = getopt_long(argc, argv, opts, longopts, 0)) != -1)
00335 #else
00336 while ((ch = getopt(argc, argv, opts)) != -1)
00337 #endif
00338 {
00339 switch (ch)
00340 {
00341 case 'c': s_cfgfile = optarg; break;
00342 case 'd': s_dbgflag = 1; break;
00343 case 'r': runpath = optarg; break;
00344 case 'U': user = optarg; break;
00345 case 'G': group = optarg; break;
00346 case 'g': dbglvl = optarg; break;
00347 case 'l': logfile = optarg; break;
00348 case 'D': dbgfile = optarg; break;
00349 case 'v': version(stdout, s_progname); exit(0);
00350 case 'h':
00351 default : usage(s_progname);
00352 }
00353 }
00354
00355 if (!s_cfgfile)
00356 s_cfgfile = DNX_DEFAULT_NODE_CONFIG_FILE;
00357
00358 if (runpath && (rplen = strlen(runpath)) != 0 && runpath[rplen - 1] == '/')
00359 runpath[rplen - 1] = 0;
00360
00361 if (s_dbgflag)
00362 appendString(&s_cmdover, "logFile=STDOUT\ndebugFile=STDOUT\n");
00363
00364 if (logfile)
00365 appendString(&s_cmdover, "logFile=%s\n", logfile);
00366
00367 if (dbgfile)
00368 appendString(&s_cmdover, "debugFile=%s\n", dbgfile);
00369
00370 if (dbglvl)
00371 appendString(&s_cmdover, "debugLevel=%s\n", dbglvl);
00372
00373 if (user)
00374 appendString(&s_cmdover, "user=%s\n", user);
00375
00376 if (group)
00377 appendString(&s_cmdover, "group=%s\n", group);
00378
00379 if (runpath)
00380 appendString(&s_cmdover, "runPath=%s\n", runpath);
00381
00382 return 0;
00383 }
00384
00385
00386
00398 static int validateCfg(DnxCfgDict * dict, void ** vptrs, void * passthru)
00399 {
00400 int ret = DNX_ERR_INVALID;
00401 DnxCfgData cfg;
00402
00403
00404
00405 cfg.wlm.dispatcherUrl = (char *) vptrs[ 0];
00406 cfg.wlm.collectorUrl = (char *) vptrs[ 1];
00407 cfg.wlm.poolInitial = (unsigned)(intptr_t)vptrs[ 2];
00408 cfg.wlm.poolMin = (unsigned)(intptr_t)vptrs[ 3];
00409 cfg.wlm.poolMax = (unsigned)(intptr_t)vptrs[ 4];
00410 cfg.wlm.poolGrow = (unsigned)(intptr_t)vptrs[ 5];
00411 cfg.wlm.pollInterval = (unsigned)(intptr_t)vptrs[ 6];
00412 cfg.wlm.shutdownGrace = (unsigned)(intptr_t)vptrs[ 7];
00413 cfg.wlm.showNodeAddr = (unsigned)(intptr_t)vptrs[ 8];
00414 cfg.wlm.reqTimeout = (unsigned)(intptr_t)vptrs[ 9];
00415 cfg.wlm.maxRetries = (unsigned)(intptr_t)vptrs[10];
00416 cfg.wlm.ttlBackoff = (unsigned)(intptr_t)vptrs[11];
00417 cfg.wlm.maxResults = (unsigned)(intptr_t)vptrs[12];
00418 cfg.agentUrl = (char *) vptrs[13];
00419 cfg.pluginPath = (char *) vptrs[14];
00420 cfg.user = (char *) vptrs[15];
00421 cfg.group = (char *) vptrs[16];
00422 cfg.runPath = (char *) vptrs[17];
00423 cfg.logFilePath = (char *) vptrs[18];
00424 cfg.debugFilePath = (char *) vptrs[19];
00425 cfg.debugLevel = (unsigned)(intptr_t)vptrs[20];
00426
00427 if (!cfg.wlm.dispatcherUrl)
00428 dnxLog("config: Missing channelDispatcher parameter.");
00429 else if (!cfg.wlm.collectorUrl)
00430 dnxLog("config: Missing channelCollector parameter.");
00431 if (cfg.wlm.poolInitial < 1 || cfg.wlm.poolInitial > cfg.wlm.poolMax)
00432 dnxLog("config: Invalid poolInitial parameter.");
00433 else if (cfg.wlm.poolMin < 1 || cfg.wlm.poolMin > cfg.wlm.poolMax)
00434 dnxLog("config: Invalid poolMin parameter.");
00435 else if (cfg.wlm.poolGrow < 1 || cfg.wlm.poolGrow >= cfg.wlm.poolMax)
00436 dnxLog("config: Invalid poolGrow parameter.");
00437 else if (cfg.wlm.pollInterval < 1)
00438 dnxLog("config: Invalid wlmPollInterval parameter.");
00439 else if (cfg.wlm.shutdownGrace < 0)
00440 dnxLog("config: Invalid wlmShutdownGracePeriod parameter.");
00441 else if (cfg.wlm.reqTimeout < 1 || cfg.wlm.reqTimeout <= cfg.wlm.ttlBackoff)
00442 dnxLog("config: Invalid threadRequestTimeout parameter.");
00443 else if (cfg.wlm.ttlBackoff >= cfg.wlm.reqTimeout)
00444 dnxLog("config: Invalid threadTtlBackoff parameter.");
00445 else if (cfg.wlm.maxResults < 1024)
00446 dnxLog("config: Invalid maxResultBuffer parameter.");
00447 else
00448 ret = 0;
00449
00450 return ret;
00451 }
00452
00453
00454
00456 static void releaseConfig(void)
00457 {
00458 dnxCfgParserDestroy(s_parser);
00459 xfree(s_cmdover);
00460 s_cmdover = 0;
00461 }
00462
00463
00464
00470 static int initConfig(char * cfgfile)
00471 {
00472 DnxCfgDict dict[] =
00473 {
00474 { "channelDispatcher", DNX_CFG_URL, &s_cfg.wlm.dispatcherUrl },
00475 { "channelCollector", DNX_CFG_URL, &s_cfg.wlm.collectorUrl },
00476 { "poolInitial", DNX_CFG_UNSIGNED, &s_cfg.wlm.poolInitial },
00477 { "poolMin", DNX_CFG_UNSIGNED, &s_cfg.wlm.poolMin },
00478 { "poolMax", DNX_CFG_UNSIGNED, &s_cfg.wlm.poolMax },
00479 { "poolGrow", DNX_CFG_UNSIGNED, &s_cfg.wlm.poolGrow },
00480 { "wlmPollInterval", DNX_CFG_UNSIGNED, &s_cfg.wlm.pollInterval },
00481 { "wlmShutdownGracePeriod", DNX_CFG_UNSIGNED, &s_cfg.wlm.shutdownGrace },
00482 { "showNodeAddr", DNX_CFG_BOOL, &s_cfg.wlm.showNodeAddr },
00483 { "threadRequestTimeout", DNX_CFG_UNSIGNED, &s_cfg.wlm.reqTimeout },
00484 { "threadMaxRetries", DNX_CFG_UNSIGNED, &s_cfg.wlm.maxRetries },
00485 { "threadTtlBackoff", DNX_CFG_UNSIGNED, &s_cfg.wlm.ttlBackoff },
00486 { "maxResultBuffer", DNX_CFG_UNSIGNED, &s_cfg.wlm.maxResults },
00487 { "channelAgent", DNX_CFG_URL, &s_cfg.agentUrl },
00488 { "pluginPath", DNX_CFG_FSPATH, &s_cfg.pluginPath },
00489 { "user", DNX_CFG_STRING, &s_cfg.user },
00490 { "group", DNX_CFG_STRING, &s_cfg.group },
00491 { "runPath", DNX_CFG_FSPATH, &s_cfg.runPath },
00492 { "logFile", DNX_CFG_FSPATH, &s_cfg.logFilePath },
00493 { "debugFile", DNX_CFG_FSPATH, &s_cfg.debugFilePath },
00494 { "debugLevel", DNX_CFG_UNSIGNED, &s_cfg.debugLevel },
00495 { 0 },
00496 };
00497 char cfgdefs[] =
00498 "channelDispatcher = udp://0:12480\n"
00499 "channelAgent = udp://0:12482\n"
00500 "poolInitial = 20\n"
00501 "poolMin = 20\n"
00502 "poolMax = 100\n"
00503 "poolGrow = 10\n"
00504 "wlmPollInterval = 2\n"
00505 "wlmShutdownGracePeriod = 35\n"
00506 "showNodeAddr = Yes\n"
00507 "threadRequestTimeout = 5\n"
00508 "threadMaxRetries = 12\n"
00509 "threadTtlBackoff = 1\n"
00510 "maxResultBuffer = 1024\n"
00511 "pluginPath = " DNX_DEFAULT_PLUGIN_PATH "\n"
00512 "user = " DNX_DEFAULT_USER "\n"
00513 "group = " DNX_DEFAULT_GROUP "\n"
00514 "runPath = " DNX_DEFAULT_RUN_PATH "\n"
00515 "logFile = " DNX_DEFAULT_LOG "\n"
00516 "debugFile = " DNX_DEFAULT_DBGLOG "\n";
00517
00518 int ret;
00519
00520
00521 if ((ret = dnxCfgParserCreate(cfgdefs, s_cfgfile, s_cmdover, dict,
00522 validateCfg, &s_parser)) != 0)
00523 return ret;
00524
00525
00526 if ((ret = dnxCfgParserParse(s_parser, 0)) != 0)
00527 releaseConfig();
00528
00529 return ret;
00530 }
00531
00532
00533
00538 static int initClientComm(void)
00539 {
00540 int ret;
00541
00542 s_agent = 0;
00543
00544
00545 if ((ret = dnxChanMapInit(0)) != DNX_OK)
00546 {
00547 dnxLog("Failed to initialize channel map: %s.", dnxErrorString(ret));
00548 return ret;
00549 }
00550
00551
00552
00553 if ((ret = dnxChanMapAdd("Agent", s_cfg.agentUrl)) != DNX_OK)
00554 {
00555 dnxLog("Failed to initialize AGENT channel: %s.", dnxErrorString(ret));
00556 dnxChanMapRelease();
00557 return ret;
00558 }
00559
00560
00561 if ((ret = dnxConnect("Agent", DNX_MODE_PASSIVE, &s_agent)) != DNX_OK)
00562 {
00563 dnxLog("Failed to open AGENT channel: %s.", dnxErrorString(ret));
00564 dnxChanMapDelete("Agent");
00565 dnxChanMapRelease();
00566 return ret;
00567 }
00568 return 0;
00569 }
00570
00571
00572
00574 static void releaseClientComm(void)
00575 {
00576 dnxDisconnect(s_agent);
00577 dnxChanMapDelete("Agent");
00578 dnxChanMapRelease();
00579 }
00580
00581
00582
00587 static void sighandler(int sig)
00588 {
00589 switch(sig)
00590 {
00591 case SIGHUP: s_reconfig = 1; break;
00592 case SIGUSR1: s_debugsig = 1; break;
00593 default: s_shutdown = 1;
00594 break;
00595 }
00596 }
00597
00598
00599
00606 static int createPidFile(char * base)
00607 {
00608 char lockFile[1024];
00609 char szPid[32];
00610
00611
00612 sprintf(lockFile, "%s/%s.pid", s_cfg.runPath, base);
00613
00614
00615 if ((s_lockfd = open(lockFile, O_RDWR | O_CREAT, 0644)) < 0)
00616 {
00617 dnxLog("Unable to create lock file, %s: %s.", lockFile, strerror(errno));
00618 return (-1);
00619 }
00620
00621
00622 if (flock(s_lockfd, LOCK_EX | LOCK_NB) != 0)
00623 {
00624 close(s_lockfd);
00625 dnxLog("Lock file already in-use: %s: %s.", lockFile, strerror(errno));
00626 return (-1);
00627 }
00628
00629
00630 sprintf(szPid, "%d\n", getpid());
00631
00632
00633 if (write(s_lockfd, szPid, strlen(szPid)) != strlen(szPid))
00634 {
00635 close(s_lockfd);
00636 dnxLog("Failed to write pid to lock file, %s: %s.",
00637 lockFile, strerror(errno));
00638 return (-1);
00639 }
00640 return 0;
00641 }
00642
00643
00644
00649 static void removePidFile(char * base)
00650 {
00651 char lockFile[1024];
00652
00653
00654 sprintf(lockFile, "%s/%s.pid", s_cfg.runPath, base);
00655
00656
00657
00658 unlink(lockFile);
00659
00660
00661 if (s_lockfd >= 0) close(s_lockfd);
00662 }
00663
00664
00665
00672 static int daemonize(char * base)
00673 {
00674 int pid, fd;
00675
00676
00677 if ((pid = fork()) < 0)
00678 {
00679 dnxLog("Failed 1st fork: %s.", strerror(errno));
00680 return -1;
00681 }
00682 else if (pid != 0)
00683 exit(0);
00684
00685
00686 setsid();
00687
00688
00689 if ((pid = fork()) < 0)
00690 {
00691 dnxLog("Failed 2nd fork: %s.", strerror(errno));
00692 return -1;
00693 }
00694 else if (pid != 0)
00695 exit(0);
00696
00697
00698 chdir("/");
00699
00700
00701 chdir(s_cfg.runPath);
00702 dnxLog("Changed working directory to %s", s_cfg.runPath);
00703
00704 umask(0);
00705
00706
00707 fd = open("/dev/null", O_RDWR);
00708 dup2(fd, 0);
00709 dup2(fd, 1);
00710 dup2(fd, 2);
00711
00712 return 0;
00713 }
00714
00715
00716
00721 static int dropPrivileges(void)
00722 {
00723
00724 if (getuid() == 0)
00725 {
00726 struct passwd * pwd;
00727 struct group * grp;
00728 uid_t uid;
00729 gid_t gid;
00730
00731 dnxLog("Running as root; attempting to drop privileges...");
00732
00733 if ((pwd = getpwnam(s_cfg.user)) != 0)
00734 uid = pwd->pw_uid;
00735 else
00736 {
00737 char * ep;
00738 uid = (uid_t)strtoul(s_cfg.user, &ep, 10);
00739 if (s_cfg.user + strlen(s_cfg.user) > ep)
00740 {
00741 dnxLog("Invalid user name or id specified: %s.", s_cfg.user);
00742 return -1;
00743 }
00744 }
00745
00746 if ((grp = getgrnam(s_cfg.group)) != 0)
00747 gid = grp->gr_gid;
00748 else
00749 {
00750 char * ep;
00751 gid = (gid_t)strtoul(s_cfg.group, &ep, 10);
00752 if (s_cfg.group + strlen(s_cfg.group) > ep)
00753 {
00754 dnxLog("Invalid group name or id specified: %s.", s_cfg.group);
00755 return -1;
00756 }
00757 }
00758
00759
00760 if (uid != 0)
00761 {
00762 int ret;
00763 if ((ret = setgid(gid)) == -1 || (ret = setuid(uid)) == -1)
00764 {
00765 dnxLog("Failed to drop privileges: %s. Terminating.", strerror(errno));
00766 return -1;
00767 }
00768
00769 grp = getgrgid(getgid());
00770 pwd = getpwuid(getuid());
00771
00772 assert(grp && pwd);
00773
00774 dnxLog("Privileges dropped to %s:%s.", pwd->pw_name, grp->gr_name);
00775 }
00776 else
00777 dnxLog("Root user requested; oh well...");
00778 }
00779 return 0;
00780 }
00781
00782
00783
00792 static void logGblConfigChanges(DnxCfgData * ocp, DnxCfgData * ncp)
00793 {
00794 if (strcmp(ocp->agentUrl, ncp->agentUrl) != 0)
00795 dnxLog("Config parameter 'channelAgent' changed from %s to %s. "
00796 "NOTE: Changing the agent URL requires a restart.",
00797 ocp? ocp->agentUrl: "<unknown>", ncp->agentUrl);
00798
00799 if (strcmp(ocp->logFilePath, ncp->logFilePath) != 0)
00800 dnxLog("Config parameter 'logFile' changed from %s to %s. "
00801 "NOTE: Changing the log file path requires a restart.",
00802 ocp? ocp->logFilePath: "<unknown>", ncp->logFilePath);
00803
00804 if (strcmp(ocp->debugFilePath, ncp->debugFilePath) != 0)
00805 dnxLog("Config parameter 'debugFile' changed from %s to %s. "
00806 "NOTE: Changing the debug log file path requires a restart.",
00807 ocp? ocp->debugFilePath: "<unknown>", ncp->debugFilePath);
00808
00809 if (strcmp(ocp->pluginPath, ncp->pluginPath) != 0)
00810 dnxLog("Config parameter 'pluginPath' changed from %s to %s.",
00811 ocp? ocp->pluginPath: "<unknown>", ncp->pluginPath);
00812
00813 if (strcmp(ocp->user, ncp->user) != 0)
00814 dnxLog("Config parameter 'user' changed from %s to %s. "
00815 "NOTE: Changing the dnx user requires a restart.",
00816 ocp? ocp->user: "<unknown>", ncp->user);
00817
00818 if (strcmp(ocp->group, ncp->group) != 0)
00819 dnxLog("Config parameter 'group' changed from %s to %s. "
00820 "NOTE: Changing the dnx group requires a restart.",
00821 ocp? ocp->group: "<unknown>", ncp->group);
00822
00823 if (strcmp(ocp->runPath, ncp->runPath) != 0)
00824 dnxLog("Config parameter 'runPath' changed from %s to %s. "
00825 "NOTE: Changing the dnx pid/lock file directory requires a restart.",
00826 ocp? ocp->runPath: "<unknown>", ncp->runPath);
00827
00828 if (ocp->debugLevel != ncp->debugLevel)
00829 dnxLog("Config parameter 'debugLevel' changed from %u to %u.",
00830 ocp? ocp->debugLevel: 0, ncp->debugLevel);
00831 }
00832
00833
00834
00839 static char * buildHelpReply(void)
00840 {
00841 static char * help =
00842 "DNX Client Management Commands:\n"
00843 " SHUTDOWN\n"
00844 " RECONFIGURE\n"
00845 " DEBUGTOGGLE\n"
00846 " RESETSTATS\n"
00847 " GETSTATS stat-list\n"
00848 " stat-list is a comma-delimited list of stat names:\n"
00849 " jobs_handled - number of jobs completed successful or otherwise\n"
00850 " jobs_ok - number of successful jobs\n"
00851 " jobs_failed - number of unsuccessful jobs\n"
00852 " th_created - number of threads created\n"
00853 " th_destroyed - number of threads destroyed\n"
00854 " th_exist - number of threads currently in existence\n"
00855 " th_active - number of threads currently active\n"
00856 " req_sent - number of requests sent to DNX server\n"
00857 " jobs_rcvd - number of jobs received from DNX server\n"
00858 " min_exec_tm - minimum job execution time\n"
00859 " avg_exec_tm - average job execution time\n"
00860 " max_exec_tm - maximum job execution time\n"
00861 " avg_th_exist - average threads in existence\n"
00862 " avg_th_active - average threads processing jobs\n"
00863 " thread_tm - total thread life time\n"
00864 " job_tm - total job processing time\n"
00865 " Note: Stats are returned in the order they are requested.\n"
00866 " Unrecognized requested stats are returned as '?'.\n"
00867 " GETCONFIG\n"
00868 " GETVERSION\n"
00869 " HELP";
00870 return xstrdup(help);
00871 }
00872
00873
00874
00881 static char * buildMgmtStatsReply(char * req)
00882 {
00883 char * rsp = 0;
00884 unsigned jobs_handled;
00885 DnxWlmStats ws;
00886
00887 struct { char * str; unsigned * stat; } rs[] =
00888 {
00889 { "jobs_handled", &jobs_handled },
00890 { "jobs_ok", &ws.jobs_succeeded },
00891 { "jobs_failed", &ws.jobs_failed },
00892 { "th_created", &ws.threads_created },
00893 { "th_destroyed", &ws.threads_destroyed },
00894 { "th_exist", &ws.total_threads },
00895 { "th_active", &ws.active_threads },
00896 { "req_sent", &ws.requests_sent },
00897 { "jobs_rcvd", &ws.jobs_received },
00898 { "min_exec_tm", &ws.min_exec_time },
00899 { "avg_exec_tm", &ws.avg_exec_time },
00900 { "max_exec_tm", &ws.max_exec_time },
00901 { "avg_th_exist", &ws.avg_total_threads },
00902 { "avg_th_active", &ws.avg_active_threads },
00903 { "thread_tm", &ws.thread_time },
00904 { "job_tm", &ws.job_time },
00905 };
00906
00907 assert(req);
00908
00909
00910 dnxWlmGetStats(s_wlm, &ws);
00911 jobs_handled = ws.jobs_succeeded + ws.jobs_failed;
00912
00913
00914 while (isspace(*req)) req++;
00915
00916 while (*req)
00917 {
00918 char * ep, * np;
00919 unsigned i;
00920
00921
00922 if ((np = strchr(req, ',')) == 0)
00923 np = req + strlen(req);
00924
00925
00926 ep = np;
00927 while (ep > req && isspace(ep[-1])) ep--;
00928
00929
00930 for (i = 0; i < elemcount(rs); i++)
00931 if (memcmp(req, rs[i].str, ep - req) == 0)
00932 {
00933 if (appendString(&rsp, "%u,", *rs[i].stat) != 0)
00934 return xfree(rsp), (char *)0;
00935 break;
00936 }
00937
00938
00939 if (i == elemcount(rs) && appendString(&rsp, "?,") != 0)
00940 return xfree(rsp), (char *)0;
00941
00942
00943 if (*(req = np)) req++;
00944
00945
00946 while (isspace(*req)) req++;
00947 }
00948
00949
00950 if (rsp)
00951 {
00952 size_t len = strlen(rsp);
00953 if (len && rsp[len - 1] == ',') rsp[len - 1] = 0;
00954 }
00955 return rsp;
00956 }
00957
00958
00959
00964 static char * buildMgmtCfgReply(void)
00965 {
00966 char * buf;
00967 size_t bufsz = 0;
00968
00969 if (dnxCfgParserGetCfg(s_parser, 0, &bufsz) != 0)
00970 return 0;
00971
00972 if ((buf = (char *)xmalloc(bufsz)) != 0)
00973 if (dnxCfgParserGetCfg(s_parser, buf, &bufsz) != 0)
00974 xfree(buf), (buf = 0);
00975
00976 return buf;
00977 }
00978
00979
00980
00985 static void freeCfgData(DnxCfgData * cpy)
00986 {
00987 xfree(cpy->agentUrl);
00988 xfree(cpy->logFilePath);
00989 xfree(cpy->debugFilePath);
00990 xfree(cpy->pluginPath);
00991 xfree(cpy->user);
00992 xfree(cpy->group);
00993 xfree(cpy->runPath);
00994 xfree(cpy->wlm.dispatcherUrl);
00995 xfree(cpy->wlm.collectorUrl);
00996 xfree(cpy);
00997 cpy = 0;
00998 }
00999
01000
01001
01008 static DnxCfgData * copyCfgData(DnxCfgData * org)
01009 {
01010 DnxCfgData * cpy;
01011
01012
01013 if ((cpy = (DnxCfgData *)xmalloc(sizeof *cpy)) == 0)
01014 return 0;
01015
01016
01017 *cpy = *org;
01018
01019
01020 cpy->agentUrl = xstrdup(org->agentUrl);
01021 cpy->logFilePath = xstrdup(org->logFilePath);
01022 cpy->debugFilePath = xstrdup(org->debugFilePath);
01023 cpy->pluginPath = xstrdup(org->pluginPath);
01024 cpy->user = xstrdup(org->user);
01025 cpy->group = xstrdup(org->group);
01026 cpy->runPath = xstrdup(org->runPath);
01027 cpy->wlm.dispatcherUrl = xstrdup(org->wlm.dispatcherUrl);
01028 cpy->wlm.collectorUrl = xstrdup(org->wlm.collectorUrl);
01029
01030
01031 if (cpy->agentUrl == 0 || cpy->logFilePath == 0
01032 || cpy->debugFilePath == 0 || cpy->pluginPath == 0
01033 || cpy->user == 0 || cpy->group == 0 || cpy->runPath == 0
01034 || cpy->wlm.dispatcherUrl == 0 || cpy->wlm.collectorUrl == 0)
01035 freeCfgData(cpy), cpy = 0;
01036
01037 return cpy;
01038 }
01039
01040
01041
01046 static int processCommands(void)
01047 {
01048 int ret;
01049 DnxMgmtRequest Msg;
01050 Msg.action = 0;
01051 dnxLog("DNX Client Agent awaiting commands...");
01052
01053 while (1)
01054 {
01055
01056 memset(Msg.address,'\0',DNX_MAX_ADDRESS);
01057
01058
01059 if ((ret = dnxWaitForMgmtRequest(s_agent, &Msg, Msg.address, 1)) == DNX_OK)
01060 {
01061 DnxMgmtReply Rsp;
01062 char addrstr[DNX_MAX_ADDRSTR];
01063
01064 dnxDebug(3,"processCommand: Received MgmtRequest from %s",
01065 dnxNtop(Msg.address, addrstr, sizeof addrstr));
01066
01067
01068 Rsp.xid = Msg.xid;
01069 Rsp.status = DNX_REQ_ACK;
01070 Rsp.reply = 0;
01071
01072
01073 if (!strcmp(Msg.action, "SHUTDOWN"))
01074 {
01075 Rsp.reply = xstrdup("OK");
01076 s_shutdown = 1;
01077 }
01078 else if (!strcmp(Msg.action, "RECONFIGURE"))
01079 {
01080 s_reconfig = 1;
01081 Rsp.reply = xstrdup("OK");
01082 }
01083 else if (!strcmp(Msg.action, "DEBUGTOGGLE"))
01084 {
01085 s_debugsig = 1;
01086 Rsp.reply = xstrdup("OK");
01087 }
01088 else if (!strcmp(Msg.action, "RESETSTATS"))
01089 {
01090 dnxWlmResetStats(s_wlm);
01091
01092 Rsp.reply = xstrdup("OK");
01093 }
01094 if (!memcmp(Msg.action, "GETSTATS ", 9))
01095 {
01096 if ((Rsp.reply = buildMgmtStatsReply(Msg.action + 9)) == 0)
01097 Rsp.status = DNX_REQ_NAK;
01098 }
01099 else if (!strcmp(Msg.action, "GETCONFIG"))
01100 {
01101 if ((Rsp.reply = buildMgmtCfgReply()) == 0)
01102 Rsp.status = DNX_REQ_NAK;
01103 }
01104 else if (!strcmp(Msg.action, "GETVERSION"))
01105 {
01106 if ((Rsp.reply = versionText(s_progname)) == 0)
01107 Rsp.status = DNX_REQ_NAK;
01108 }
01109 else if (!strcmp(Msg.action, "HELP"))
01110 {
01111 if ((Rsp.reply = buildHelpReply()) == 0)
01112 Rsp.status = DNX_REQ_NAK;
01113 }
01114
01115
01116 if ((ret = dnxSendMgmtReply(s_agent, &Rsp, Msg.address)) != 0)
01117 dnxLog("Agent response failure: %s.", dnxErrorString(ret));
01118
01119
01120 xfree(Rsp.reply);
01121 xfree(Msg.action);
01122
01123 }
01124 else if (ret != DNX_ERR_TIMEOUT)
01125 dnxLog("Agent channel failure: %s.", dnxErrorString(ret));
01126
01127 if (s_reconfig)
01128 {
01129 DnxCfgData * old;
01130
01131 dnxLog("Agent received RECONFIGURE request. Reconfiguring...");
01132
01133
01134 old = copyCfgData(&s_cfg);
01135 if ((ret = dnxCfgParserParse(s_parser, 0)) == 0)
01136 logGblConfigChanges(old, &s_cfg);
01137 if (old) freeCfgData(old);
01138 dnxLog("Reconfiguration: %s.", dnxErrorString(ret));
01139 s_reconfig = 0;
01140 }
01141 if (s_debugsig)
01142 {
01143 s_dbgflag ^= 1;
01144 dnxLog("Agent: Received DEBUGTOGGLE request. Debugging is %s.",
01145 s_dbgflag? "ENABLED" : "DISABLED");
01146 s_debugsig = 0;
01147 }
01148 if (s_shutdown)
01149 {
01150 dnxLog("Agent: Received SHUTDOWN request. Terminating...");
01151 break;
01152 }
01153 }
01154 if (ret == DNX_ERR_TIMEOUT)
01155 ret = 0;
01156 return ret;
01157 }
01158
01159
01160
01169 int main(int argc, char ** argv)
01170 {
01171 int ret;
01172
01173
01174
01175
01176 if ((ret = getOptions(argc, argv)) != DNX_OK
01177 || (ret = initConfig(s_cfgfile)) != DNX_OK)
01178 goto e0;
01179
01180
01181 dnxLogInit(s_cfg.logFilePath, s_cfg.debugFilePath, 0, &s_cfg.debugLevel);
01182
01183 dnxLog("-------- DNX Client Daemon Version %s Startup --------", VERSION);
01184 dnxLog("Copyright (c) 2006-2010 Intellectual Reserve. All rights reserved.");
01185 dnxLog("Configuration file: %s.", s_cfgfile);
01186 dnxLog("Dispatcher: %s.", s_cfg.wlm.dispatcherUrl);
01187 dnxLog("Collector: %s.", s_cfg.wlm.collectorUrl);
01188 dnxLog("Agent: %s.", s_cfg.agentUrl);
01189 if (s_cfg.debugFilePath && s_cfg.debugLevel != 0)
01190 {
01191 dnxLog("Debug logging enabled at level %d to %s.",
01192 s_cfg.debugLevel, s_cfg.debugFilePath);
01193 #if DEBUG_HEAP
01194 dnxLog("Debug heap is enabled.");
01195 #endif
01196 #if DEBUG_LOCKS
01197 dnxLog("Debug locks are enabled.");
01198 #endif
01199 }
01200
01201
01202 if ((ret = dnxPluginInit(s_cfg.pluginPath)) != DNX_OK)
01203 {
01204 dnxLog("Plugin init failed: %s.", dnxErrorString(ret));
01205 goto e1;
01206 }
01207
01208
01209 signal(SIGHUP, sighandler);
01210 signal(SIGINT, sighandler);
01211 signal(SIGQUIT, sighandler);
01212 signal(SIGABRT, sighandler);
01213 signal(SIGTERM, sighandler);
01214 signal(SIGPIPE, SIG_IGN);
01215 signal(SIGALRM, SIG_IGN);
01216 signal(SIGUSR1, sighandler);
01217 signal(SIGUSR2, SIG_IGN);
01218
01219
01220 if (!s_dbgflag && (ret = daemonize(s_progname)) != 0)
01221 goto e2;
01222
01223
01224 if ((ret = dropPrivileges()) != 0)
01225 goto e2;
01226
01227
01228 if (!s_dbgflag && (ret = createPidFile(s_progname)) != 0)
01229 goto e2;
01230
01231
01232 if ((ret = initClientComm()) != DNX_OK)
01233 {
01234 dnxLog("Communications init failed: %s.", dnxErrorString(ret));
01235 goto e3;
01236 }
01237
01238 if ((ret = dnxWlmCreate(&s_cfg.wlm, &s_wlm)) != 0)
01239 {
01240 dnxLog("Thread pool init failed: %s.", dnxErrorString(ret));
01241 goto e4;
01242 }
01243
01244
01245 ret = processCommands();
01246
01247
01248 dnxDebug(1, "Command-loop exited: %s.", dnxErrorString(ret));
01249
01250 dnxWlmDestroy(s_wlm);
01251 e4:releaseClientComm();
01252 e3:removePidFile(s_progname);
01253 e2:dnxPluginRelease();
01254 e1:releaseConfig();
01255 e0:dnxLog("-------- DNX Client Daemon Shutdown Complete --------");
01256
01257
01258
01259 xheapchk();
01260 closelog();
01261
01262 return ret;
01263 }
01264
01265
01266