dnxClientMain.c

Go to the documentation of this file.
00001 /*--------------------------------------------------------------------------
00002 
00003    Copyright (c) 2006-2007, Intellectual Reserve, Inc. All rights reserved.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License version 2 as
00007    published by the Free Software Foundation.
00008 
00009    This program is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012    GNU General Public License for more details.
00013 
00014    You should have received a copy of the GNU General Public License
00015    along with this program; if not, write to the Free Software
00016    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017 
00018   --------------------------------------------------------------------------*/
00019 
00032 #include "dnxError.h"
00033 #include "dnxDebug.h"
00034 #include "dnxTransport.h"
00035 #include "dnxClntProt.h"
00036 #include "dnxCfgParser.h"
00037 #include "dnxWLM.h"
00038 #include "dnxPlugin.h"
00039 #include "dnxLogging.h"
00040 
00041 #include <sys/types.h>
00042 #include <sys/stat.h>
00043 #include <sys/wait.h>
00044 #include <sys/file.h>
00045 #include <stdio.h>
00046 #include <stdlib.h>
00047 #include <stdint.h>
00048 #include <stdarg.h>
00049 #include <unistd.h>
00050 #include <string.h>
00051 #include <signal.h>
00052 #include <pthread.h>
00053 #include <fcntl.h>
00054 #include <errno.h>
00055 #include <pwd.h>
00056 #include <grp.h>
00057 
00058 #if HAVE_CONFIG_H
00059 # include "config.h"
00060 #else
00061 # define VERSION           "<unknown>"
00062 # define PACKAGE_BUGREPORT "<unknown>"
00063 #endif
00064 
00065 #if HAVE_GETOPT_LONG
00066 #define _GNU_SOURCE
00067 #include <getopt.h>
00068 #endif
00069 
00070 #ifndef SYSCONFDIR
00071 # define SYSCONFDIR     "/etc"
00072 #endif
00073 
00074 #ifndef SYSLOGDIR
00075 # define SYSLOGDIR      "/var/log"
00076 #endif
00077 
00078 #ifndef SYSRUNPATH
00079 # define SYSRUNPATH     "/var/run/dnx"
00080 #endif
00081 
00082 #ifndef LIBEXECDIR
00083 # define LIBEXECDIR     "/usr/lib"
00084 #endif
00085 
00086 #ifndef DNXUSER
00087 # define DNXUSER        "nagios"
00088 #endif
00089 
00090 #ifndef DNXGROUP
00091 # define DNXGROUP       "nagios"
00092 #endif
00093 
00094 #ifndef COMPILE_FLAGS
00095 # define COMPILE_FLAGS  "<unknown>"
00096 #endif
00097 
00098 // default configuration
00099 #define DNX_DEFAULT_NODE_CONFIG_FILE      SYSCONFDIR "/dnxClient.cfg"
00100 #define DNX_DEFAULT_LOG                   SYSLOGDIR "/dnxcld.log"
00101 #define DNX_DEFAULT_DBGLOG                SYSLOGDIR "/dnxcld.debug.log"
00102 #define DNX_DEFAULT_PLUGIN_PATH           LIBEXECDIR
00103 #define DNX_DEFAULT_RUN_PATH              SYSRUNPATH
00104 #define DNX_DEFAULT_USER                  DNXUSER
00105 #define DNX_DEFAULT_GROUP                 DNXGROUP
00106 
00107 #define elemcount(x) (sizeof(x)/sizeof(*(x)))
00108 
00109 // client configuration data
00110 typedef struct DnxCfgData
00111 {
00112    char * agentUrl;              
00113    char * pluginPath;            
00114    char * user;                  
00115    char * group;                 
00116    char * runPath;               
00117    char * logFilePath;           
00118    char * debugFilePath;         
00119    unsigned debugLevel;          
00120    DnxWlmCfgData wlm;            
00121 } DnxCfgData;
00122 
00123 // module statics
00124 static DnxCfgData s_cfg;         
00125 static DnxCfgParser * s_parser;  
00126 static DnxWlm * s_wlm = 0;       
00127 static DnxChannel * s_agent;     
00128 static char * s_progname;        
00129 static char * s_cfgfile;         
00130 static char * s_cmdover = 0;     
00131 static int s_dbgflag = 0;        
00132 static int s_shutdown = 0;       
00133 static int s_reconfig = 0;       
00134 static int s_debugsig = 0;       
00135 static int s_lockfd = -1;        
00136 
00137 //extern DCS * gTopDCS;
00138 
00139 //----------------------------------------------------------------------------
00140 
00149 static char * versionText(char * base)
00150 {
00151    char buf[1024];
00152    snprintf(buf, sizeof(buf) - 1,
00153       "\n"
00154       "  %s Version " VERSION ", Built " __DATE__ " at " __TIME__ ".\n"
00155       "  Distributed Nagios eXecutor (DNX) Client Daemon.\n"
00156       "  Please report bugs to <" PACKAGE_BUGREPORT ">.\n"
00157       "\n"
00158       "  Default configuration:\n"
00159       "    Default config file: "      DNX_DEFAULT_NODE_CONFIG_FILE "\n"
00160       "    Default log file: "         DNX_DEFAULT_LOG "\n"
00161       "    Default debug log file: "   DNX_DEFAULT_DBGLOG "\n"
00162       "    Default system run path: "  DNX_DEFAULT_RUN_PATH "\n"
00163       "    Default daemon user: "      DNX_DEFAULT_USER "\n"
00164       "    Default daemon group: "     DNX_DEFAULT_GROUP "\n"
00165 //    "    Compile flags: "            COMPILE_FLAGS "\n"
00166 #if DEBUG_HEAP
00167       "    Debug heap is ENABLED.\n"
00168 #endif
00169 #if DEBUG_LOCKS
00170       "    Debug locks are ENABLED.\n"
00171 #endif
00172       , base
00173    );
00174    return xstrdup(buf);
00175 }
00176 
00177 //----------------------------------------------------------------------------
00178 
00184 static void version(FILE * fp, char * base)
00185 {
00186    char * vertxt = versionText(base);
00187    if (vertxt)
00188    {
00189       fprintf(fp, "%s\n", vertxt);
00190       xfree(vertxt);
00191       vertxt = 0;
00192    }
00193 }
00194 
00195 //----------------------------------------------------------------------------
00196 
00201 static void usage(char * base)
00202 {
00203 
00204 #if HAVE_GETOPT_LONG
00205 # define OL_CFGFILE  ", --cfgfile "
00206 # define OL_LOGFILE  ", --logfile "
00207 # define OL_DBGFILE  ", --dbgfile "
00208 # define OL_DBGLEVEL ", --dbglevel"
00209 # define OL_DEBUG    ", --debug   "
00210 # define OL_RUNPATH  ", --runpath "
00211 # define OL_USER     ", --user    "
00212 # define OL_GROUP    ", --group   "
00213 # define OL_VERSION  ", --version "
00214 # define OL_HELP     ", --help    "
00215 #else
00216 # define OL_CFGFILE
00217 # define OL_LOGFILE
00218 # define OL_DBGFILE
00219 # define OL_DBGLEVEL
00220 # define OL_DEBUG
00221 # define OL_RUNPATH
00222 # define OL_USER
00223 # define OL_GROUP
00224 # define OL_VERSION
00225 # define OL_HELP
00226 #endif
00227 
00228    version(stderr, base);
00229    fprintf(stderr,
00230       "  Usage: %s [options]\n"
00231       "    Where [options] are:\n"
00232       "      -c" OL_CFGFILE  " <file>   specify the file and path of the config file.\n"
00233       "      -l" OL_LOGFILE  " <file>   specify the file and path of the log file.\n"
00234       "      -D" OL_DBGFILE  " <file>   specify the file and path of the debug log file.\n"
00235       "      -g" OL_DBGLEVEL " <value>  specify the level of debugging output.\n"
00236       "      -d" OL_DEBUG    "          enable debug mode (will not become a daemon).\n"
00237       "      -r" OL_RUNPATH  " <path>   specify the path of the lock/pid file.\n"
00238       "      -U" OL_USER     " <user>   specify the DNX client user name or id.\n"
00239       "      -G" OL_GROUP    " <group>  specify the DNX client group name or id.\n"
00240       "      -v" OL_VERSION  "          display DNX client version and exit.\n"
00241       "      -h" OL_HELP     "          display this help screen and exit.\n"
00242       "\n",
00243       base
00244    );
00245    exit(-1);
00246 }
00247 
00248 //----------------------------------------------------------------------------
00249 
00260 static int appendString(char ** spp, char * fmt, ... )
00261 {
00262    char buf[1024];
00263    char * newstr;
00264    size_t strsz;
00265    va_list ap;
00266 
00267    // build new string
00268    va_start(ap, fmt);
00269    vsnprintf(buf, sizeof buf, fmt, ap);
00270    va_end(ap);
00271 
00272    // reallocate buffer; initialize if necessary
00273    strsz = strlen(buf) + 1;
00274    if ((newstr = xrealloc(*spp, (*spp? strlen(*spp): 0) + strsz)) == 0) return DNX_ERR_MEMORY;
00275    if (*spp == 0)
00276       *newstr = 0;
00277 
00278    // concatenate new string onto exiting string; return updated pointer
00279    strcat(newstr, buf);
00280    *spp = newstr;
00281    return 0;
00282 }
00283 
00284 //----------------------------------------------------------------------------
00285 
00293 static int getOptions(int argc, char ** argv)
00294 {
00295 // extern int optind;
00296    extern char * optarg;
00297    extern int opterr, optopt;
00298 
00299    static char opts[] = "c:dr:g:l:D:U:G:vh";
00300 
00301 #if HAVE_GETOPT_LONG
00302    static struct option longopts[] =
00303    {
00304       { "cfgfile",  required_argument, 0, 'c' },
00305       { "logfile",  required_argument, 0, 'l' },
00306       { "dbgfile",  required_argument, 0, 'D' },
00307       { "dbglevel", required_argument, 0, 'g' },
00308       { "debug",    no_argument,       0, 'd' },
00309       { "runpath",  required_argument, 0, 'r' },
00310       { "version",  no_argument,       0, 'v' },
00311       { "user",     required_argument, 0, 'U' },
00312       { "group",    required_argument, 0, 'G' },
00313       { "help",     no_argument,       0, 'h' },
00314       { 0, 0, 0, 0 },
00315    };
00316 #endif
00317 
00318    int ch;
00319    char * cp;
00320    char * logfile = 0;
00321    char * dbgfile = 0;
00322    char * dbglvl = 0;
00323    char * user = 0;
00324    char * group = 0;
00325    char * runpath = 0;
00326    size_t rplen;
00327 
00328    // set program base name
00329    s_progname = (char *)((cp = strrchr(argv[0], '/')) != 0 ? (cp + 1) : argv[0]);
00330 
00331    opterr = 0; /* Disable error messages */
00332 
00333 #if HAVE_GETOPT_LONG
00334    while ((ch = getopt_long(argc, argv, opts, longopts, 0)) != -1)
00335 #else
00336    while ((ch = getopt(argc, argv, opts)) != -1)
00337 #endif
00338    {
00339       switch (ch)
00340       {
00341          case 'c': s_cfgfile = optarg; break;
00342          case 'd': s_dbgflag = 1;      break;
00343          case 'r': runpath   = optarg; break;
00344          case 'U': user      = optarg; break;
00345          case 'G': group     = optarg; break;
00346          case 'g': dbglvl    = optarg; break;
00347          case 'l': logfile   = optarg; break;
00348          case 'D': dbgfile   = optarg; break;
00349          case 'v': version(stdout, s_progname); exit(0);
00350          case 'h':
00351          default : usage(s_progname);
00352       }
00353    }
00354 
00355    if (!s_cfgfile)
00356       s_cfgfile = DNX_DEFAULT_NODE_CONFIG_FILE;
00357 
00358    if (runpath && (rplen = strlen(runpath)) != 0 && runpath[rplen - 1] == '/')
00359       runpath[rplen - 1] = 0;
00360 
00361    if (s_dbgflag)
00362       appendString(&s_cmdover, "logFile=STDOUT\ndebugFile=STDOUT\n");
00363 
00364    if (logfile)
00365       appendString(&s_cmdover, "logFile=%s\n", logfile);
00366 
00367    if (dbgfile)
00368       appendString(&s_cmdover, "debugFile=%s\n", dbgfile);
00369 
00370    if (dbglvl)
00371       appendString(&s_cmdover, "debugLevel=%s\n", dbglvl);
00372 
00373    if (user)
00374       appendString(&s_cmdover, "user=%s\n", user);
00375 
00376    if (group)
00377       appendString(&s_cmdover, "group=%s\n", group);
00378 
00379    if (runpath)
00380       appendString(&s_cmdover, "runPath=%s\n", runpath);
00381 
00382    return 0;
00383 }
00384 
00385 //----------------------------------------------------------------------------
00386 
00398 static int validateCfg(DnxCfgDict * dict, void ** vptrs, void * passthru)
00399 {
00400    int ret = DNX_ERR_INVALID;
00401    DnxCfgData cfg;
00402 
00403    // setup data structure so we can use the same functionality we had before
00404    // NOTE: The order of the vptrs is defined by the order of the dictionary.
00405    cfg.wlm.dispatcherUrl = (char *)            vptrs[ 0];
00406    cfg.wlm.collectorUrl  = (char *)            vptrs[ 1];
00407    cfg.wlm.poolInitial   = (unsigned)(intptr_t)vptrs[ 2];
00408    cfg.wlm.poolMin       = (unsigned)(intptr_t)vptrs[ 3];
00409    cfg.wlm.poolMax       = (unsigned)(intptr_t)vptrs[ 4];
00410    cfg.wlm.poolGrow      = (unsigned)(intptr_t)vptrs[ 5];
00411    cfg.wlm.pollInterval  = (unsigned)(intptr_t)vptrs[ 6];
00412    cfg.wlm.shutdownGrace = (unsigned)(intptr_t)vptrs[ 7];
00413    cfg.wlm.showNodeAddr  = (unsigned)(intptr_t)vptrs[ 8];
00414    cfg.wlm.reqTimeout    = (unsigned)(intptr_t)vptrs[ 9];
00415    cfg.wlm.maxRetries    = (unsigned)(intptr_t)vptrs[10];
00416    cfg.wlm.ttlBackoff    = (unsigned)(intptr_t)vptrs[11];
00417    cfg.wlm.maxResults    = (unsigned)(intptr_t)vptrs[12];
00418    cfg.agentUrl          = (char *)            vptrs[13];
00419    cfg.pluginPath        = (char *)            vptrs[14];
00420    cfg.user              = (char *)            vptrs[15];
00421    cfg.group             = (char *)            vptrs[16];
00422    cfg.runPath           = (char *)            vptrs[17];
00423    cfg.logFilePath       = (char *)            vptrs[18];
00424    cfg.debugFilePath     = (char *)            vptrs[19];
00425    cfg.debugLevel        = (unsigned)(intptr_t)vptrs[20];
00426 
00427    if (!cfg.wlm.dispatcherUrl)
00428       dnxLog("config: Missing channelDispatcher parameter.");
00429    else if (!cfg.wlm.collectorUrl)
00430       dnxLog("config: Missing channelCollector parameter.");
00431    if (cfg.wlm.poolInitial < 1 || cfg.wlm.poolInitial > cfg.wlm.poolMax)
00432       dnxLog("config: Invalid poolInitial parameter.");
00433    else if (cfg.wlm.poolMin < 1 || cfg.wlm.poolMin > cfg.wlm.poolMax)
00434       dnxLog("config: Invalid poolMin parameter.");
00435    else if (cfg.wlm.poolGrow < 1 || cfg.wlm.poolGrow >= cfg.wlm.poolMax)
00436       dnxLog("config: Invalid poolGrow parameter.");
00437    else if (cfg.wlm.pollInterval < 1)
00438       dnxLog("config: Invalid wlmPollInterval parameter.");
00439    else if (cfg.wlm.shutdownGrace < 0)
00440       dnxLog("config: Invalid wlmShutdownGracePeriod parameter.");
00441    else if (cfg.wlm.reqTimeout < 1 || cfg.wlm.reqTimeout <= cfg.wlm.ttlBackoff)
00442       dnxLog("config: Invalid threadRequestTimeout parameter.");
00443    else if (cfg.wlm.ttlBackoff >= cfg.wlm.reqTimeout)
00444       dnxLog("config: Invalid threadTtlBackoff parameter.");
00445    else if (cfg.wlm.maxResults < 1024)
00446       dnxLog("config: Invalid maxResultBuffer parameter.");
00447    else
00448       ret = 0;
00449 
00450    return ret;
00451 }
00452 
00453 //----------------------------------------------------------------------------
00454 
00456 static void releaseConfig(void)
00457 {
00458    dnxCfgParserDestroy(s_parser);
00459    xfree(s_cmdover);
00460    s_cmdover = 0;
00461 }
00462 
00463 //----------------------------------------------------------------------------
00464 
00470 static int initConfig(char * cfgfile)
00471 {
00472    DnxCfgDict dict[] =
00473    {  // Do NOT change the order, unless you know what you're doing!
00474       { "channelDispatcher",      DNX_CFG_URL,      &s_cfg.wlm.dispatcherUrl },
00475       { "channelCollector",       DNX_CFG_URL,      &s_cfg.wlm.collectorUrl  },
00476       { "poolInitial",            DNX_CFG_UNSIGNED, &s_cfg.wlm.poolInitial   },
00477       { "poolMin",                DNX_CFG_UNSIGNED, &s_cfg.wlm.poolMin       },
00478       { "poolMax",                DNX_CFG_UNSIGNED, &s_cfg.wlm.poolMax       },
00479       { "poolGrow",               DNX_CFG_UNSIGNED, &s_cfg.wlm.poolGrow      },
00480       { "wlmPollInterval",        DNX_CFG_UNSIGNED, &s_cfg.wlm.pollInterval  },
00481       { "wlmShutdownGracePeriod", DNX_CFG_UNSIGNED, &s_cfg.wlm.shutdownGrace },
00482       { "showNodeAddr",           DNX_CFG_BOOL,     &s_cfg.wlm.showNodeAddr  },
00483       { "threadRequestTimeout",   DNX_CFG_UNSIGNED, &s_cfg.wlm.reqTimeout    },
00484       { "threadMaxRetries",       DNX_CFG_UNSIGNED, &s_cfg.wlm.maxRetries    },
00485       { "threadTtlBackoff",       DNX_CFG_UNSIGNED, &s_cfg.wlm.ttlBackoff    },
00486       { "maxResultBuffer",        DNX_CFG_UNSIGNED, &s_cfg.wlm.maxResults    },
00487       { "channelAgent",           DNX_CFG_URL,      &s_cfg.agentUrl          },
00488       { "pluginPath",             DNX_CFG_FSPATH,   &s_cfg.pluginPath        },
00489       { "user",                   DNX_CFG_STRING,   &s_cfg.user              },
00490       { "group",                  DNX_CFG_STRING,   &s_cfg.group             },
00491       { "runPath",                DNX_CFG_FSPATH,   &s_cfg.runPath           },
00492       { "logFile",                DNX_CFG_FSPATH,   &s_cfg.logFilePath       },
00493       { "debugFile",              DNX_CFG_FSPATH,   &s_cfg.debugFilePath     },
00494       { "debugLevel",             DNX_CFG_UNSIGNED, &s_cfg.debugLevel        },
00495       { 0 },
00496    };
00497    char cfgdefs[] =
00498       "channelDispatcher = udp://0:12480\n"
00499       "channelAgent = udp://0:12482\n"
00500       "poolInitial = 20\n"
00501       "poolMin = 20\n"
00502       "poolMax = 100\n"
00503       "poolGrow = 10\n"
00504       "wlmPollInterval = 2\n"
00505       "wlmShutdownGracePeriod = 35\n"
00506       "showNodeAddr = Yes\n"
00507       "threadRequestTimeout = 5\n"
00508       "threadMaxRetries = 12\n"
00509       "threadTtlBackoff = 1\n"
00510       "maxResultBuffer = 1024\n"
00511       "pluginPath = " DNX_DEFAULT_PLUGIN_PATH "\n"
00512       "user = " DNX_DEFAULT_USER "\n"
00513       "group = " DNX_DEFAULT_GROUP "\n"
00514       "runPath = " DNX_DEFAULT_RUN_PATH "\n"
00515       "logFile = " DNX_DEFAULT_LOG "\n"
00516       "debugFile = " DNX_DEFAULT_DBGLOG "\n";
00517 
00518    int ret;
00519 
00520    // create global configuration parser object
00521    if ((ret = dnxCfgParserCreate(cfgdefs, s_cfgfile, s_cmdover, dict,
00522          validateCfg, &s_parser)) != 0)
00523       return ret;
00524 
00525    // parse config file
00526    if ((ret = dnxCfgParserParse(s_parser, 0)) != 0)
00527       releaseConfig();
00528 
00529    return ret;
00530 }
00531 
00532 //----------------------------------------------------------------------------
00533 
00538 static int initClientComm(void)
00539 {
00540    int ret;
00541 
00542    s_agent = 0;
00543 
00544    // initialize the DNX comm stack
00545    if ((ret = dnxChanMapInit(0)) != DNX_OK)
00546    {
00547       dnxLog("Failed to initialize channel map: %s.", dnxErrorString(ret));
00548       return ret;
00549    }
00550 
00551    // create a channel for receiving DNX Client Requests
00552    //    (e.g., Shutdown, Status, etc.)
00553    if ((ret = dnxChanMapAdd("Agent", s_cfg.agentUrl)) != DNX_OK)
00554    {
00555       dnxLog("Failed to initialize AGENT channel: %s.", dnxErrorString(ret));
00556       dnxChanMapRelease();
00557       return ret;
00558    }
00559 
00560    // attempt to open the Agent channel
00561    if ((ret = dnxConnect("Agent", DNX_MODE_PASSIVE, &s_agent)) != DNX_OK)
00562    {
00563       dnxLog("Failed to open AGENT channel: %s.", dnxErrorString(ret));
00564       dnxChanMapDelete("Agent");
00565       dnxChanMapRelease();
00566       return ret;
00567    }
00568    return 0;
00569 }
00570 
00571 //----------------------------------------------------------------------------
00572 
00574 static void releaseClientComm(void)
00575 {
00576    dnxDisconnect(s_agent);
00577    dnxChanMapDelete("Agent");
00578    dnxChanMapRelease();
00579 }
00580 
00581 //----------------------------------------------------------------------------
00582 
00587 static void sighandler(int sig)
00588 {
00589    switch(sig)
00590    {
00591       case SIGHUP:   s_reconfig = 1;   break;
00592       case SIGUSR1:  s_debugsig = 1;   break;
00593       default:       s_shutdown = 1;
00594       break;
00595    }
00596 }
00597 
00598 //----------------------------------------------------------------------------
00599 
00606 static int createPidFile(char * base)
00607 {
00608    char lockFile[1024];
00609    char szPid[32];
00610 
00611    // create lock-file name
00612    sprintf(lockFile, "%s/%s.pid", s_cfg.runPath, base);
00613 
00614    // open the lock file
00615    if ((s_lockfd = open(lockFile, O_RDWR | O_CREAT, 0644)) < 0)
00616    {
00617       dnxLog("Unable to create lock file, %s: %s.", lockFile, strerror(errno));
00618       return (-1);
00619    }
00620 
00621    // attempt to lock the lock-file
00622    if (flock(s_lockfd, LOCK_EX | LOCK_NB) != 0)
00623    {
00624       close(s_lockfd);
00625       dnxLog("Lock file already in-use: %s: %s.", lockFile, strerror(errno));
00626       return (-1);
00627    }
00628 
00629    // create a string containing our PID
00630    sprintf(szPid, "%d\n", getpid());
00631 
00632    // write our PID to the lock file
00633    if (write(s_lockfd, szPid, strlen(szPid)) != strlen(szPid))
00634    {
00635       close(s_lockfd);
00636       dnxLog("Failed to write pid to lock file, %s: %s.",
00637             lockFile, strerror(errno));
00638       return (-1);
00639    }
00640    return 0;
00641 }
00642 
00643 //----------------------------------------------------------------------------
00644 
00649 static void removePidFile(char * base)
00650 {
00651    char lockFile[1024];
00652 
00653    // create lock-file name
00654    sprintf(lockFile, "%s/%s.pid", s_cfg.runPath, base);
00655 
00656    // remove the lock file - we do this before closing it in order to prevent
00657    //    race conditions between the closing and removing operations.
00658    unlink(lockFile);
00659 
00660    // close/unlock the lock file
00661    if (s_lockfd >= 0) close(s_lockfd);
00662 }
00663 
00664 //----------------------------------------------------------------------------
00665 
00672 static int daemonize(char * base)
00673 {
00674    int pid, fd;
00675 
00676    // fork to allow parent process to exit
00677    if ((pid = fork()) < 0)
00678    {
00679       dnxLog("Failed 1st fork: %s.", strerror(errno));
00680       return -1;
00681    }
00682    else if (pid != 0)
00683       exit(0);
00684 
00685    // become process group leader
00686    setsid();
00687 
00688    // fork again to allow process group leader to exit
00689    if ((pid = fork()) < 0)
00690    {
00691       dnxLog("Failed 2nd fork: %s.", strerror(errno));
00692       return -1;
00693    }
00694    else if (pid != 0)
00695       exit(0);
00696 
00697    // change working directory to root so as to not keep any file systems open
00698    chdir("/");
00699 
00700    // change working directory back to specified directory from config file.
00701    chdir(s_cfg.runPath);
00702    dnxLog("Changed working directory to %s", s_cfg.runPath);
00703    // allow us complete control over any newly created files
00704    umask(0);
00705 
00706    // close and redirect stdin, stdout, stderr
00707    fd = open("/dev/null", O_RDWR);
00708    dup2(fd, 0);
00709    dup2(fd, 1);
00710    dup2(fd, 2); //Comment this line to make stderr not point to dev null.
00711 
00712    return 0;   // continue execution as a daemon
00713 }
00714 
00715 //----------------------------------------------------------------------------
00716 
00721 static int dropPrivileges(void)
00722 {
00723    // drop privileges if running as root
00724    if (getuid() == 0)
00725    {
00726       struct passwd * pwd;
00727       struct group * grp;
00728       uid_t uid;
00729       gid_t gid;
00730 
00731       dnxLog("Running as root; attempting to drop privileges...");
00732 
00733       if ((pwd = getpwnam(s_cfg.user)) != 0)
00734          uid = pwd->pw_uid;
00735       else
00736       {
00737          char * ep;
00738          uid = (uid_t)strtoul(s_cfg.user, &ep, 10);
00739          if (s_cfg.user + strlen(s_cfg.user) > ep)
00740          {
00741             dnxLog("Invalid user name or id specified: %s.", s_cfg.user);
00742             return -1;
00743          }
00744       }
00745 
00746       if ((grp = getgrnam(s_cfg.group)) != 0)
00747          gid = grp->gr_gid;
00748       else
00749       {
00750          char * ep;
00751          gid = (gid_t)strtoul(s_cfg.group, &ep, 10);
00752          if (s_cfg.group + strlen(s_cfg.group) > ep)
00753          {
00754             dnxLog("Invalid group name or id specified: %s.", s_cfg.group);
00755             return -1;
00756          }
00757       }
00758 
00759       // drop privileges if root user not requested
00760       if (uid != 0)
00761       {
00762          int ret;
00763          if ((ret = setgid(gid)) == -1 || (ret = setuid(uid)) == -1)
00764          {
00765             dnxLog("Failed to drop privileges: %s. Terminating.", strerror(errno));
00766             return -1;
00767          }
00768 
00769          grp = getgrgid(getgid());
00770          pwd = getpwuid(getuid());
00771 
00772          assert(grp && pwd);
00773 
00774          dnxLog("Privileges dropped to %s:%s.", pwd->pw_name, grp->gr_name);
00775       }
00776       else
00777          dnxLog("Root user requested; oh well...");
00778    }
00779    return 0;
00780 }
00781 
00782 //----------------------------------------------------------------------------
00783 
00792 static void logGblConfigChanges(DnxCfgData * ocp, DnxCfgData * ncp)
00793 {
00794    if (strcmp(ocp->agentUrl, ncp->agentUrl) != 0)
00795       dnxLog("Config parameter 'channelAgent' changed from %s to %s. "
00796             "NOTE: Changing the agent URL requires a restart.",
00797             ocp? ocp->agentUrl: "<unknown>", ncp->agentUrl);
00798 
00799    if (strcmp(ocp->logFilePath, ncp->logFilePath) != 0)
00800       dnxLog("Config parameter 'logFile' changed from %s to %s. "
00801             "NOTE: Changing the log file path requires a restart.",
00802             ocp? ocp->logFilePath: "<unknown>", ncp->logFilePath);
00803 
00804    if (strcmp(ocp->debugFilePath, ncp->debugFilePath) != 0)
00805       dnxLog("Config parameter 'debugFile' changed from %s to %s. "
00806             "NOTE: Changing the debug log file path requires a restart.",
00807             ocp? ocp->debugFilePath: "<unknown>", ncp->debugFilePath);
00808 
00809    if (strcmp(ocp->pluginPath, ncp->pluginPath) != 0)
00810       dnxLog("Config parameter 'pluginPath' changed from %s to %s.",
00811             ocp? ocp->pluginPath: "<unknown>", ncp->pluginPath);
00812 
00813    if (strcmp(ocp->user, ncp->user) != 0)
00814       dnxLog("Config parameter 'user' changed from %s to %s. "
00815             "NOTE: Changing the dnx user requires a restart.",
00816             ocp? ocp->user: "<unknown>", ncp->user);
00817 
00818    if (strcmp(ocp->group, ncp->group) != 0)
00819       dnxLog("Config parameter 'group' changed from %s to %s. "
00820             "NOTE: Changing the dnx group requires a restart.",
00821             ocp? ocp->group: "<unknown>", ncp->group);
00822 
00823    if (strcmp(ocp->runPath, ncp->runPath) != 0)
00824       dnxLog("Config parameter 'runPath' changed from %s to %s. "
00825             "NOTE: Changing the dnx pid/lock file directory requires a restart.",
00826             ocp? ocp->runPath: "<unknown>", ncp->runPath);
00827 
00828    if (ocp->debugLevel != ncp->debugLevel)
00829       dnxLog("Config parameter 'debugLevel' changed from %u to %u.",
00830             ocp? ocp->debugLevel: 0, ncp->debugLevel);
00831 }
00832 
00833 //----------------------------------------------------------------------------
00834 
00839 static char * buildHelpReply(void)
00840 {
00841    static char * help =
00842          "DNX Client Management Commands:\n"
00843          "  SHUTDOWN\n"
00844          "  RECONFIGURE\n"
00845          "  DEBUGTOGGLE\n"
00846          "  RESETSTATS\n"
00847          "  GETSTATS stat-list\n"
00848          "    stat-list is a comma-delimited list of stat names:\n"
00849          "      jobs_handled  - number of jobs completed successful or otherwise\n"
00850          "      jobs_ok       - number of successful jobs\n"
00851          "      jobs_failed   - number of unsuccessful jobs\n"
00852          "      th_created    - number of threads created\n"
00853          "      th_destroyed  - number of threads destroyed\n"
00854          "      th_exist      - number of threads currently in existence\n"
00855          "      th_active     - number of threads currently active\n"
00856          "      req_sent      - number of requests sent to DNX server\n"
00857          "      jobs_rcvd     - number of jobs received from DNX server\n"
00858          "      min_exec_tm   - minimum job execution time\n"
00859          "      avg_exec_tm   - average job execution time\n"
00860          "      max_exec_tm   - maximum job execution time\n"
00861          "      avg_th_exist  - average threads in existence\n"
00862          "      avg_th_active - average threads processing jobs\n"
00863          "      thread_tm     - total thread life time\n"
00864          "      job_tm        - total job processing time\n"
00865          "    Note: Stats are returned in the order they are requested.\n"
00866          "          Unrecognized requested stats are returned as '?'.\n"
00867          "  GETCONFIG\n"
00868          "  GETVERSION\n"
00869          "  HELP";
00870    return xstrdup(help);
00871 }
00872 
00873 //----------------------------------------------------------------------------
00874 
00881 static char * buildMgmtStatsReply(char * req)
00882 {
00883    char * rsp = 0;
00884    unsigned jobs_handled;
00885    DnxWlmStats ws;
00886 
00887    struct { char * str; unsigned * stat; } rs[] =
00888    {
00889       { "jobs_handled",   &jobs_handled          },
00890       { "jobs_ok",        &ws.jobs_succeeded     },
00891       { "jobs_failed",    &ws.jobs_failed        },
00892       { "th_created",     &ws.threads_created    },
00893       { "th_destroyed",   &ws.threads_destroyed  },
00894       { "th_exist",       &ws.total_threads      },
00895       { "th_active",      &ws.active_threads     },
00896       { "req_sent",       &ws.requests_sent      },
00897       { "jobs_rcvd",      &ws.jobs_received      },
00898       { "min_exec_tm",    &ws.min_exec_time      },
00899       { "avg_exec_tm",    &ws.avg_exec_time      },
00900       { "max_exec_tm",    &ws.max_exec_time      },
00901       { "avg_th_exist",   &ws.avg_total_threads  },
00902       { "avg_th_active",  &ws.avg_active_threads },
00903       { "thread_tm",      &ws.thread_time        },
00904       { "job_tm",         &ws.job_time           },
00905    };
00906 
00907    assert(req);
00908 
00909    // gather stats for display
00910    dnxWlmGetStats(s_wlm, &ws);
00911    jobs_handled = ws.jobs_succeeded + ws.jobs_failed;
00912 
00913    // trim leading ws
00914    while (isspace(*req)) req++;
00915 
00916    while (*req)
00917    {
00918       char * ep, * np;
00919       unsigned i;
00920 
00921       // find start of next string or end
00922       if ((np = strchr(req, ',')) == 0)
00923          np = req + strlen(req);
00924 
00925       // trim trailing ws
00926       ep = np;
00927       while (ep > req && isspace(ep[-1])) ep--;
00928 
00929       // search table for sub-string, append requested stat to rsp
00930       for (i = 0; i < elemcount(rs); i++)
00931          if (memcmp(req, rs[i].str, ep - req) == 0)
00932          {
00933             if (appendString(&rsp, "%u,", *rs[i].stat) != 0)
00934                return xfree(rsp), (char *)0;
00935             break;
00936          }
00937 
00938       // check for unknown stat
00939       if (i == elemcount(rs) && appendString(&rsp, "?,") != 0)
00940          return xfree(rsp), (char *)0;
00941 
00942       // move to next sub-string or end
00943       if (*(req = np)) req++;
00944 
00945       // trim leading ws
00946       while (isspace(*req)) req++;
00947    }
00948 
00949    // remove trailing comma in non-empty lists
00950    if (rsp)
00951    {
00952       size_t len = strlen(rsp);
00953       if (len && rsp[len - 1] == ',') rsp[len - 1] = 0;
00954    }
00955    return rsp;
00956 }
00957 
00958 //----------------------------------------------------------------------------
00959 
00964 static char * buildMgmtCfgReply(void)
00965 {
00966    char * buf;
00967    size_t bufsz = 0;
00968 
00969    if (dnxCfgParserGetCfg(s_parser, 0, &bufsz) != 0)
00970       return 0;
00971 
00972    if ((buf = (char *)xmalloc(bufsz)) != 0)
00973       if (dnxCfgParserGetCfg(s_parser, buf, &bufsz) != 0)
00974          xfree(buf), (buf = 0);
00975 
00976    return buf;
00977 }
00978 
00979 //----------------------------------------------------------------------------
00980 
00985 static void freeCfgData(DnxCfgData * cpy)
00986 {
00987    xfree(cpy->agentUrl);
00988    xfree(cpy->logFilePath);
00989    xfree(cpy->debugFilePath);
00990    xfree(cpy->pluginPath);
00991    xfree(cpy->user);
00992    xfree(cpy->group);
00993    xfree(cpy->runPath);
00994    xfree(cpy->wlm.dispatcherUrl);
00995    xfree(cpy->wlm.collectorUrl);
00996    xfree(cpy);
00997    cpy = 0;
00998 }
00999 
01000 //----------------------------------------------------------------------------
01001 
01008 static DnxCfgData * copyCfgData(DnxCfgData * org)
01009 {
01010    DnxCfgData * cpy;
01011 
01012    // make new config structure
01013    if ((cpy = (DnxCfgData *)xmalloc(sizeof *cpy)) == 0)
01014       return 0;
01015 
01016    // copy all values
01017    *cpy = *org;
01018 
01019    // attempt to make string buffer copies
01020    cpy->agentUrl = xstrdup(org->agentUrl);
01021    cpy->logFilePath = xstrdup(org->logFilePath);
01022    cpy->debugFilePath = xstrdup(org->debugFilePath);
01023    cpy->pluginPath = xstrdup(org->pluginPath);
01024    cpy->user = xstrdup(org->user);
01025    cpy->group = xstrdup(org->group);
01026    cpy->runPath = xstrdup(org->runPath);
01027    cpy->wlm.dispatcherUrl = xstrdup(org->wlm.dispatcherUrl);
01028    cpy->wlm.collectorUrl = xstrdup(org->wlm.collectorUrl);
01029 
01030    // if any buffer copies failed, free everything, return 0
01031    if (cpy->agentUrl == 0 || cpy->logFilePath == 0
01032          || cpy->debugFilePath == 0 || cpy->pluginPath == 0
01033          || cpy->user == 0 || cpy->group == 0 || cpy->runPath == 0
01034          || cpy->wlm.dispatcherUrl == 0 || cpy->wlm.collectorUrl == 0)
01035       freeCfgData(cpy), cpy = 0;
01036 
01037    return cpy;
01038 }
01039 
01040 //----------------------------------------------------------------------------
01041 
01046 static int processCommands(void)
01047 {
01048    int ret;
01049    DnxMgmtRequest Msg;
01050    Msg.action = 0;
01051    dnxLog("DNX Client Agent awaiting commands...");
01052 
01053    while (1)
01054    {
01055 
01056       memset(Msg.address,'\0',DNX_MAX_ADDRESS);
01057 
01058       // wait 1 second for a request; process the request, if valid
01059       if ((ret = dnxWaitForMgmtRequest(s_agent, &Msg, Msg.address, 1)) == DNX_OK)
01060       {
01061          DnxMgmtReply Rsp;
01062          char addrstr[DNX_MAX_ADDRSTR];
01063 
01064          dnxDebug(3,"processCommand:  Received MgmtRequest from %s",
01065                dnxNtop(Msg.address, addrstr, sizeof addrstr));
01066 
01067          // setup some default response values
01068          Rsp.xid = Msg.xid;
01069          Rsp.status = DNX_REQ_ACK;
01070          Rsp.reply = 0;
01071 
01072          // perform the requested action
01073          if (!strcmp(Msg.action, "SHUTDOWN"))
01074          {
01075             Rsp.reply = xstrdup("OK");
01076             s_shutdown = 1;
01077          }
01078          else if (!strcmp(Msg.action, "RECONFIGURE"))
01079          {
01080             s_reconfig = 1;
01081             Rsp.reply = xstrdup("OK");
01082          }
01083          else if (!strcmp(Msg.action, "DEBUGTOGGLE"))
01084          {
01085             s_debugsig = 1;
01086             Rsp.reply = xstrdup("OK");
01087          }
01088          else if (!strcmp(Msg.action, "RESETSTATS"))
01089          {
01090             dnxWlmResetStats(s_wlm);
01091 //            dnxComStatReset();
01092             Rsp.reply = xstrdup("OK");
01093          }
01094          if (!memcmp(Msg.action, "GETSTATS ", 9))
01095          {
01096             if ((Rsp.reply = buildMgmtStatsReply(Msg.action + 9)) == 0)
01097                Rsp.status = DNX_REQ_NAK;
01098          }
01099          else if (!strcmp(Msg.action, "GETCONFIG"))
01100          {
01101             if ((Rsp.reply = buildMgmtCfgReply()) == 0)
01102                Rsp.status = DNX_REQ_NAK;
01103          }
01104          else if (!strcmp(Msg.action, "GETVERSION"))
01105          {
01106             if ((Rsp.reply = versionText(s_progname)) == 0)
01107                Rsp.status = DNX_REQ_NAK;
01108          }
01109          else if (!strcmp(Msg.action, "HELP"))
01110          {
01111             if ((Rsp.reply = buildHelpReply()) == 0)
01112                Rsp.status = DNX_REQ_NAK;
01113          }
01114 
01115          // send response, log response failures
01116          if ((ret = dnxSendMgmtReply(s_agent, &Rsp, Msg.address)) != 0)
01117             dnxLog("Agent response failure: %s.", dnxErrorString(ret));
01118 
01119          // free request and reply message buffers
01120          xfree(Rsp.reply);
01121          xfree(Msg.action);
01122 
01123       }
01124       else if (ret != DNX_ERR_TIMEOUT)
01125          dnxLog("Agent channel failure: %s.", dnxErrorString(ret));
01126 
01127       if (s_reconfig)
01128       {
01129          DnxCfgData * old;
01130 
01131          dnxLog("Agent received RECONFIGURE request. Reconfiguring...");
01132 
01133          // reparse config file into temporary cfg structure and validate
01134          old = copyCfgData(&s_cfg);
01135          if ((ret = dnxCfgParserParse(s_parser, 0)) == 0)
01136             logGblConfigChanges(old, &s_cfg);
01137          if (old) freeCfgData(old);
01138          dnxLog("Reconfiguration: %s.", dnxErrorString(ret));
01139          s_reconfig = 0;
01140       }
01141       if (s_debugsig)
01142       {
01143          s_dbgflag ^= 1;
01144          dnxLog("Agent: Received DEBUGTOGGLE request. Debugging is %s.",
01145                s_dbgflag? "ENABLED" : "DISABLED");
01146          s_debugsig = 0;
01147       }
01148       if (s_shutdown)
01149       {
01150          dnxLog("Agent: Received SHUTDOWN request. Terminating...");
01151          break;
01152       }
01153    }
01154    if (ret == DNX_ERR_TIMEOUT)   // timeout is ok
01155       ret = 0;
01156    return ret;
01157 }
01158 
01159 //----------------------------------------------------------------------------
01160 
01169 int main(int argc, char ** argv)
01170 {
01171    int ret;
01172 
01173 //   gTopDCS = dnxComStatCreateDCS("127.0.0.1");
01174 
01175    // parse command line options; read configuration file
01176    if ((ret = getOptions(argc, argv)) != DNX_OK
01177          || (ret = initConfig(s_cfgfile)) != DNX_OK)
01178       goto e0;
01179 
01180    // initialize the logging subsystem with configured settings
01181    dnxLogInit(s_cfg.logFilePath, s_cfg.debugFilePath, 0, &s_cfg.debugLevel);
01182 
01183    dnxLog("-------- DNX Client Daemon Version %s Startup --------", VERSION);
01184    dnxLog("Copyright (c) 2006-2010 Intellectual Reserve. All rights reserved.");
01185    dnxLog("Configuration file: %s.", s_cfgfile);
01186    dnxLog("Dispatcher: %s.", s_cfg.wlm.dispatcherUrl);
01187    dnxLog("Collector: %s.", s_cfg.wlm.collectorUrl);
01188    dnxLog("Agent: %s.", s_cfg.agentUrl);
01189    if (s_cfg.debugFilePath && s_cfg.debugLevel != 0)
01190    {
01191       dnxLog("Debug logging enabled at level %d to %s.",
01192             s_cfg.debugLevel, s_cfg.debugFilePath);
01193 #if DEBUG_HEAP
01194       dnxLog("Debug heap is enabled.");
01195 #endif
01196 #if DEBUG_LOCKS
01197       dnxLog("Debug locks are enabled.");
01198 #endif
01199    }
01200 
01201    // load dynamic plugin modules (e.g., nrpe, snmp, etc.)
01202    if ((ret = dnxPluginInit(s_cfg.pluginPath)) != DNX_OK)
01203    {
01204       dnxLog("Plugin init failed: %s.", dnxErrorString(ret));
01205       goto e1;
01206    }
01207 
01208    // install signal handlers
01209    signal(SIGHUP,  sighandler);
01210    signal(SIGINT,  sighandler);
01211    signal(SIGQUIT, sighandler);
01212    signal(SIGABRT, sighandler);
01213    signal(SIGTERM, sighandler);
01214    signal(SIGPIPE, SIG_IGN);
01215    signal(SIGALRM, SIG_IGN);
01216    signal(SIGUSR1, sighandler);
01217    signal(SIGUSR2, SIG_IGN);
01218 
01219    // daemonize if not running in debug mode
01220    if (!s_dbgflag && (ret = daemonize(s_progname)) != 0)
01221       goto e2;
01222 
01223    // drop privileges as per configuration
01224    if ((ret = dropPrivileges()) != 0)
01225       goto e2;
01226 
01227    // create pid file if not running in debug mode
01228    if (!s_dbgflag && (ret = createPidFile(s_progname)) != 0)
01229       goto e2;
01230 
01231    // initialize the communications stack
01232    if ((ret = initClientComm()) != DNX_OK)
01233    {
01234       dnxLog("Communications init failed: %s.", dnxErrorString(ret));
01235       goto e3;
01236    }
01237 
01238    if ((ret = dnxWlmCreate(&s_cfg.wlm, &s_wlm)) != 0)
01239    {
01240       dnxLog("Thread pool init failed: %s.", dnxErrorString(ret));
01241       goto e4;
01242    }
01243 
01244    //----------------------------------------------------------------------
01245    ret = processCommands();
01246    //----------------------------------------------------------------------
01247 
01248    dnxDebug(1, "Command-loop exited: %s.", dnxErrorString(ret));
01249 
01250    dnxWlmDestroy(s_wlm);
01251 e4:releaseClientComm();
01252 e3:removePidFile(s_progname);
01253 e2:dnxPluginRelease();
01254 e1:releaseConfig();
01255 e0:dnxLog("-------- DNX Client Daemon Shutdown Complete --------");
01256 
01257 //   dnxComStatDestroy();
01258 
01259    xheapchk();    // works when debug heap is compiled in
01260    closelog();
01261 
01262    return ret;
01263 }
01264 
01265 /*--------------------------------------------------------------------------*/
01266 

Generated on Tue Apr 13 15:48:07 2010 for DNX by  doxygen 1.5.6