dnxNebMain.c File Reference

Implements the split-server version of the DNX Nagios plugin. More...

#include "dnxNebMain.h"
#include "dnxCfgParser.h"
#include "dnxError.h"
#include "dnxDebug.h"
#include "nagios.h"
#include "objects.h"
#include "nebmodules.h"
#include "nebstructs.h"
#include "nebcallbacks.h"
#include "neberrors.h"
#include "broker.h"
#include <unistd.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <regex.h>

Go to the source code of this file.

Data Structures

struct  DnxCfgData
 The internal server module configuration data structure. More...

Defines

#define VERSION   "<unknown>"
#define SYSCONFDIR   "/etc"
#define SYSLOGDIR   "/var/log"
#define LIBEXECDIR   "/usr/libexec"
#define elemcount(x)   (sizeof(x)/sizeof(*(x)))
#define DNX_DEFAULT_SERVER_CONFIG_FILE   SYSCONFDIR "/dnxServer.cfg"
#define DNX_DEFAULT_LOG   SYSLOGDIR "/dnxsrv.log"
#define DNX_DEFAULT_DBGLOG   SYSLOGDIR "/dnxsrv.dbg.log"
#define DNX_DEFAULT_SERVER   LIBEXECDIR "/dnxServer"
#define P0R   0
#define P0W   1
#define P1R   2
#define P1W   3
#define P2R   4
#define P2W   5
#define PMX   6

Functions

 NEB_API_VERSION (CURRENT_NEB_API_VERSION)
 specify event broker API version (required by Nagios).
static void releaseConfig (void)
 Cleanup the config file parser.
static int validateCfg (DnxCfgDict *dict, void **vptrs, void *passthru)
 Validate a configuration data structure in context.
static int initConfig (char *cfgfile)
 Read and parse the dnxServer configuration file.
static int nagiosGetServiceCount (void)
 Return the number of services configured in Nagios.
static int dnxPostResult (void *data, time_t start_time, unsigned delta, int early_timeout, int res_code, char *res_data)
static int GetMsgHeader (DnxMsgHeader *phdr)
 Retrieve and validate the next message's header.
static int ProcessResultTransferMsg (DnxResultTransfer *result)
 Transfer a result from the server and post it to Nagios.
static void * dnxResultsListener (void *arg)
 Results listener thread.
static int GetAckNak (void)
 Read a single ACK/NAK from the server.
static int AllocateNodeRequest (void)
 Request a client node request from the server.
static int TransferJobToServer (DnxJobTransfer *job)
 Transfer a job to the dnx server.
static int dnxPostNewJob (unsigned long serial, nebstruct_service_check_data *ds)
 Post a new job from Nagios to the dnx server.
static int ehSvcCheck (int event_type, void *data)
 Service Check Event Handler.
static int dnxCalculateJobListSize (void)
 Calculate the optimal size of the job list.
static int execServerProcess (void)
 Exec a server child process with comm channels on well-known descriptors.
static int terminateServerProcess (void)
 Terminate an existing server process.
static int ehProcessData (int event_type, void *data)
 Process Data Event Handler.
static int dnxPluginDeInit (void)
 Deinitialize the DNX Plugin.
static int dnxPluginInit (void)
 Initialize the DNX plugin.
static int launchScript (char *script)
 Launches an external command and waits for it to return a status code.
int nebmodule_deinit (int flags, int reason)
 The main NEB module deinitialization routine.
int nebmodule_init (int flags, char *args, nebmodule *handle)
 The main NEB module initialization routine.

Variables

static char * cfgfile
 The server configuration file.
static DnxCfgData cfg
 The server configuration parameters.
static DnxCfgParserparser
 The system configuration parser.
static time_t start_time
 The module start time.
static void * myHandle
 Private NEB module handle.
static regex_t regEx
 Compiled regular expression structure.
static pid_t serverpid
 Server child process id.
static int outdatafd
 Pipe write channel to server.
static int inackfd
 Pipe ack/nak channel from server.
static int indatafd
 Pipe read channel from server.
static pthread_t rlthread
 Thread handle for results listener.


Detailed Description

Implements the split-server version of the DNX Nagios plugin.

This version of the DNX Nagios Event Broker (NEB) module plugin library only implements the most basic of services required as a Nagios plugin. It starts the DNX Server as a child process, then creates a thread to listen for response data from the server. Finally, it registers a service check handler which begins to receive event service notifications.

It intercepts all service checks and dispatches them to distributed worker nodes via the DNX server child process. Communication with the child process is managed through inherited pipe file handles dup'd to well-known file descriptor numbers.

Author:
John Calcote (dnx-devel@lists.sourceforge.net)
Attention:
Please submit patches to http://dnx.sourceforge.net

Definition in file dnxNebMain.c.


Define Documentation

#define DNX_DEFAULT_DBGLOG   SYSLOGDIR "/dnxsrv.dbg.log"

Definition at line 101 of file dnxNebMain.c.

#define DNX_DEFAULT_LOG   SYSLOGDIR "/dnxsrv.log"

Definition at line 100 of file dnxNebMain.c.

#define DNX_DEFAULT_SERVER   LIBEXECDIR "/dnxServer"

Definition at line 102 of file dnxNebMain.c.

Referenced by initConfig().

#define DNX_DEFAULT_SERVER_CONFIG_FILE   SYSCONFDIR "/dnxServer.cfg"

Definition at line 99 of file dnxNebMain.c.

Referenced by getOptions(), nebmodule_init(), and versionText().

#define elemcount (  )     (sizeof(x)/sizeof(*(x)))

Definition at line 97 of file dnxNebMain.c.

#define LIBEXECDIR   "/usr/libexec"

Definition at line 86 of file dnxNebMain.c.

#define P0R   0

Referenced by execServerProcess().

#define P0W   1

Referenced by execServerProcess().

#define P1R   2

Referenced by execServerProcess().

#define P1W   3

Referenced by execServerProcess().

#define P2R   4

Referenced by execServerProcess().

#define P2W   5

Referenced by execServerProcess().

#define PMX   6

Referenced by execServerProcess().

#define SYSCONFDIR   "/etc"

Definition at line 78 of file dnxNebMain.c.

#define SYSLOGDIR   "/var/log"

Definition at line 82 of file dnxNebMain.c.

#define VERSION   "<unknown>"

Definition at line 74 of file dnxNebMain.c.


Function Documentation

static int AllocateNodeRequest ( void   )  [static]

Request a client node request from the server.

Once a node request allocation has been successfully requested, it must be followed (asap) by a job transfer because the server has one on hold and is now waiting for the job to be transferred. Additionally, a node request has a timeout, and too much delay will cause it to be replaced.

Returns:
Zero on success, or a non-zero error value.

Todo:
EPIPE means broken pipe; restart server.

Definition at line 939 of file dnxNebMain.c.

References DNX_MSG_RESERVE_NODEREQ, DNX_MSG_SIGNATURE, dnxDebug(), GetAckNak(), DnxMsgHeader::msgtype, outdatafd, DnxMsgHeader::signature, and DnxMsgHeader::structsz.

Referenced by dnxPostNewJob().

static int dnxCalculateJobListSize ( void   )  [static]

Calculate the optimal size of the job list.

Assumes the caller will actually use the returned value to allocate the job list. Based on this assumption, this routine logs messages indicating when various configuration overrides have taken effect.

Returns:
The calculated size of the job list.

Definition at line 1162 of file dnxNebMain.c.

References dnxLog(), and nagiosGetServiceCount().

Referenced by dnxServerInit(), and execServerProcess().

static int dnxPluginDeInit ( void   )  [static]

Deinitialize the DNX Plugin.

Returns:
Always returns zero.

Definition at line 1313 of file dnxNebMain.c.

References dnxLog(), ehProcessData(), ehSvcCheck(), indatafd, neb_deregister_callback(), rlthread, serverpid, and terminateServerProcess().

Referenced by ehProcessData(), and nebmodule_deinit().

static int dnxPluginInit ( void   )  [static]

Initialize the DNX plugin.

Returns:
Zero on success, or a non-zero error value.

Definition at line 1344 of file dnxNebMain.c.

References dnxErrorString(), dnxLog(), dnxResultsListener(), ehSvcCheck(), execServerProcess(), myHandle, neb_register_callback(), and rlthread.

static int dnxPostNewJob ( unsigned long  serial,
nebstruct_service_check_data *  ds 
) [static]

Post a new job from Nagios to the dnx server.

The payload of the job transfer structure is the address of the structure itself. This structure is allocated here, but freed when the results are collected and posted. If no result is posted, the structure address is lost.

Parameters:
[in] serial - the serial number of the new job.
[in] ds - a pointer to the nagios job that's being posted.
Returns:
Zero on success, or a non-zero error value.
Todo:
Track posted jobs and free them if associated result is lost.

Definition at line 1044 of file dnxNebMain.c.

References AllocateNodeRequest(), DNX_ERR_MEMORY, TransferJobToServer(), and xmalloc.

Referenced by ehSvcCheck(), and ProcessJobTransferMsg().

static int dnxPostResult ( void *  data,
time_t  start_time,
unsigned  delta,
int  early_timeout,
int  res_code,
char *  res_data 
) [static]

Todo:
Investigate the rationale for setting res_code this way.

Definition at line 701 of file dnxNebMain.c.

References DnxJobTransfer::chkopts, DnxJobTransfer::reschedule, DnxJobTransfer::schedule, DnxJobTransfer::svc, and xfree.

Referenced by dnxCollector(), dnxTimer(), and ProcessResultTransferMsg().

static void* dnxResultsListener ( void *  arg  )  [static]

Results listener thread.

Listens for the server to post results. Captures each result and posts it to the Nagios results queue.

Parameters:
arg the thread routine argument.
Returns:
the thread return value.

Definition at line 842 of file dnxNebMain.c.

References DNX_MSG_RESULT_TRANSFER, dnxLog(), GetMsgHeader(), DnxResultTransfer::hdr, DnxMsgHeader::msgtype, ProcessResultTransferMsg(), DnxMsgHeader::structsz, xfree, and xrealloc.

Referenced by dnxPluginInit().

static int ehProcessData ( int  event_type,
void *  data 
) [static]

Process Data Event Handler.

We actually register for service check events and start the child process server program in this event handler because Nagios starts NEB modules long before it's fully configured its service check list. By waiting to initialize the server in this event handler, we have access to the number of service checks that Nagios is configured to manage, and we can thus properly configure the DNX server child process with a properly sized job queue.

Parameters:
[in] event_type - the event regarding which we were called by Nagios.
[in] data - an opaque pointer to an event-specific data structure.
Returns:
Zero if all is okay, but we want nagios to handle this event; non-zero if there's a problem of some sort.

Definition at line 1426 of file dnxNebMain.c.

References dnxDebug(), dnxLog(), dnxPluginDeInit(), dnxPluginInit(), launchScript(), and DnxCfgData::syncScript.

Referenced by dnxPluginDeInit(), dnxServerDeInit(), and nebmodule_init().

static int ehSvcCheck ( int  event_type,
void *  data 
) [static]

Service Check Event Handler.

Nagios calls this routine multiple times for each service check. We only care about the INITIATE call, so we return OK for all others.

Parameters:
[in] event_type - the event type for which we're being called.
[in] data - an opaque pointer to nagios event-specific data.
Returns:
Zero if we want Nagios to handle the event; NEBERROR_CALLBACKOVERRIDE indicates that we want to handle the event ourselves; any other non-zero value represents an error.

Definition at line 1104 of file dnxNebMain.c.

References DNX_OK, dnxDebug(), dnxErrorString(), dnxPostNewJob(), DnxCfgData::localCheckPattern, and regEx.

Referenced by dnxPluginDeInit(), dnxPluginInit(), dnxServerDeInit(), and dnxServerInit().

static int execServerProcess ( void   )  [static]

Exec a server child process with comm channels on well-known descriptors.

The child process is an entirely different program, so we have to dup the pipe descriptors onto well-known descriptor numbers so the server will know where to find them. I considered using STDIO channels for this, but the server may want to use STDIO for other reasons. Too bad, because it's a perfect fit: STDIN = job xfer, STDOUT = ack/nak, and STDERR = results. If I can find a way to keep the user from configuring any logging on STDIO, I may yet convert over. The handle values are defined in the plugin main header file, so it would be simple to redefine those descriptor values.

Returns:
Zero on success or a non-zero error code.

Definition at line 1191 of file dnxNebMain.c.

References cfgfile, DNX_PLGFD, DNX_SAKFD, DNX_SRVFD, dnxCalculateJobListSize(), elemcount, inackfd, indatafd, outdatafd, P0R, P0W, P1R, P1W, P2R, P2W, PMX, DnxCfgData::serverPath, and serverpid.

Referenced by dnxPluginInit().

static int GetAckNak ( void   )  [static]

Read a single ACK/NAK from the server.

The ack/nak is actually a DNX error code (zero on success).

Returns:
an error, or the value of the ack/nak.

Todo:
EPIPE means broken pipe; restart server.

Definition at line 897 of file dnxNebMain.c.

References dnxDebug(), and inackfd.

Referenced by AllocateNodeRequest(), and TransferJobToServer().

static int GetMsgHeader ( DnxMsgHeader phdr  )  [static]

Retrieve and validate the next message's header.

Parameters:
[in] phdr - the header buffer to be filled.
Returns:
zero on success or a non-zero error code.

Todo:
EPIPE means broken pipe: restart child process

Definition at line 747 of file dnxNebMain.c.

References DNX_ERR_UNSUPPORTED, DNX_MAX_XFER_SIZE, DNX_MSG_SIGNATURE, dnxDebug(), indatafd, DnxMsgHeader::signature, and DnxMsgHeader::structsz.

Referenced by dnxResultsListener(), and processRequests().

static int initConfig ( char *  cfgfile  )  [static]

Read and parse the dnxServer configuration file.

Parameters:
[in] cfgfile - the configuration file to be read.
Returns:
Zero on success, or a non-zero error value.

Definition at line 205 of file dnxNebMain.c.

References DnxCfgData::auditFilePath, DnxCfgData::debugFilePath, DnxCfgData::debugLevel, DNX_CFG_FSPATH, DNX_CFG_STRING, DNX_CFG_UNSIGNED, DNX_DEFAULT_DBGLOG, DNX_DEFAULT_LOG, DNX_DEFAULT_SERVER, dnxCfgParserCreate(), dnxCfgParserDestroy(), dnxCfgParserParse(), DnxCfgData::localCheckPattern, DnxCfgData::logFilePath, regEx, DnxCfgData::serverPath, DnxCfgData::syncScript, and validateCfg().

static int launchScript ( char *  script  )  [static]

Launches an external command and waits for it to return a status code.

Parameters:
[in] script - the command line to be launched.
Returns:
Zero on success, or a non-zero error value.

Definition at line 1388 of file dnxNebMain.c.

References DNX_ERR_INVALID, DNX_OK, and dnxLog().

Referenced by ehProcessData().

static int nagiosGetServiceCount ( void   )  [static]

Return the number of services configured in Nagios.

This technique works fine as long as it's called after Nagios has had a chance to build the list. Since Nagios loads NEB modules before configuring itself, we can't call this routine until Nagios calls our ehProcessData handler.

Returns:
The number of services configured in Nagios.
Todo:
This routine should be in nagios code.

Definition at line 256 of file dnxNebMain.c.

References service_list.

Referenced by dnxCalculateJobListSize().

NEB_API_VERSION ( CURRENT_NEB_API_VERSION   ) 

specify event broker API version (required by Nagios).

int nebmodule_deinit ( int  flags,
int  reason 
)

The main NEB module deinitialization routine.

This function gets called when the module is unloaded by the event broker.

Parameters:
[in] flags - nagios NEB module flags - not used.
[in] reason - nagios reason code - not used.
Returns:
Always returns zero.

Definition at line 1478 of file dnxNebMain.c.

References dnxLog(), dnxPluginDeInit(), releaseConfig(), and xheapchk.

int nebmodule_init ( int  flags,
char *  args,
nebmodule *  handle 
)

The main NEB module initialization routine.

This function gets called when the module is loaded by the event broker.

Parameters:
[in] flags - module flags - not used
[in] args - module arguments. These come from the nagios configuration file, and are passed through to the module as it loads.
[in] handle - our module handle - passed from the OS to nagios as nagios loaded us.
Returns:
Zero on success, or a non-zero error value.

Definition at line 1504 of file dnxNebMain.c.

References DnxCfgData::auditFilePath, cfgfile, DnxCfgData::debugFilePath, DnxCfgData::debugLevel, DNX_DEFAULT_SERVER_CONFIG_FILE, dnxErrorString(), dnxLog(), dnxLogInit(), ehProcessData(), initConfig(), DnxCfgData::logFilePath, myHandle, neb_register_callback(), releaseConfig(), terminateServerProcess(), and VERSION.

static int ProcessResultTransferMsg ( DnxResultTransfer result  )  [static]

Transfer a result from the server and post it to Nagios.

Parameters:
[in,out] result - the current result structure pointer.
Returns:
zero on success or a non-zero error code.

Todo:
EPIPE means broken pipe: restart child process

Definition at line 794 of file dnxNebMain.c.

References DnxResultTransfer::delta, dnxDebug(), dnxLog(), dnxPostResult(), DnxResultTransfer::early_timeout, DnxResultTransfer::hdr, indatafd, DnxResultTransfer::payload, DnxResultTransfer::res_code, DnxResultTransfer::res_data, DnxResultTransfer::serial, DnxResultTransfer::start_time, and DnxMsgHeader::structsz.

Referenced by dnxResultsListener().

static void releaseConfig ( void   )  [static]

Cleanup the config file parser.

Definition at line 137 of file dnxNebMain.c.

References dnxCfgParserDestroy(), DnxCfgData::localCheckPattern, and regEx.

static int terminateServerProcess ( void   )  [static]

Terminate an existing server process.

Returns:
Zero on success or a non-zero error code.

Definition at line 1290 of file dnxNebMain.c.

References inackfd, outdatafd, and serverpid.

Referenced by dnxPluginDeInit(), and nebmodule_init().

static int TransferJobToServer ( DnxJobTransfer job  )  [static]

Transfer a job to the dnx server.

A job transfer request must be preceded by an allocate client node request.

The request is larger than an integer, so we assume it can take more than one pass, but the result is an int, so we hope we can read it in one pass.

Parameters:
[in] job - the job to be transferred to the server.
Returns:
zero on success or a non-zero error code.

Todo:
EPIPE means broken pipe; restart server.

Definition at line 990 of file dnxNebMain.c.

References DNX_MSG_JOB_TRANSFER, DNX_MSG_SIGNATURE, dnxDebug(), GetAckNak(), DnxJobTransfer::hdr, DnxMsgHeader::msgtype, outdatafd, DnxMsgHeader::signature, and DnxMsgHeader::structsz.

Referenced by dnxPostNewJob().

static int validateCfg ( DnxCfgDict dict,
void **  vptrs,
void *  passthru 
) [static]

Validate a configuration data structure in context.

Parameters:
[in] dict - the dictionary used by the DnxCfgParser.
[in] vptrs - an array of opaque objects (either pointers or values) to be checked.
[in] passthru - an opaque pointer passed through from dnxCfgParserCreate. In this routine, it's the regex_t object into which we should parse the regular expression if one is given.
Returns:
Zero on success, or a non-zero error value. This error value is passed back through dnxCfgParserParse.

Definition at line 159 of file dnxNebMain.c.

References DnxCfgData::auditFilePath, DnxCfgData::debugFilePath, DnxCfgData::debugLevel, DNX_ERR_INVALID, dnxLog(), DnxCfgData::localCheckPattern, DnxCfgData::logFilePath, DnxCfgData::serverPath, and DnxCfgData::syncScript.


Variable Documentation

DnxCfgData cfg [static]

The server configuration parameters.

Definition at line 121 of file dnxNebMain.c.

Referenced by buildCurrentCfgCache(), formatBool(), formatIntOrUnsigned(), formatIntOrUnsignedArray(), formatString(), formatStringArray(), and validateCfg().

char* cfgfile [static]

The server configuration file.

Definition at line 120 of file dnxNebMain.c.

Referenced by execServerProcess(), and nebmodule_init().

int inackfd [static]

Pipe ack/nak channel from server.

Definition at line 128 of file dnxNebMain.c.

Referenced by execServerProcess(), GetAckNak(), and terminateServerProcess().

int indatafd [static]

Pipe read channel from server.

Definition at line 129 of file dnxNebMain.c.

Referenced by dnxPluginDeInit(), execServerProcess(), GetMsgHeader(), and ProcessResultTransferMsg().

void* myHandle [static]

Private NEB module handle.

Definition at line 124 of file dnxNebMain.c.

Referenced by dnxPluginInit(), dnxServerInit(), and nebmodule_init().

int outdatafd [static]

Pipe write channel to server.

Definition at line 127 of file dnxNebMain.c.

Referenced by AllocateNodeRequest(), execServerProcess(), terminateServerProcess(), and TransferJobToServer().

DnxCfgParser* parser [static]

The system configuration parser.

Definition at line 122 of file dnxNebMain.c.

regex_t regEx [static]

Compiled regular expression structure.

Definition at line 125 of file dnxNebMain.c.

Referenced by ehSvcCheck(), initConfig(), and releaseConfig().

pthread_t rlthread [static]

Thread handle for results listener.

Definition at line 130 of file dnxNebMain.c.

Referenced by dnxPluginDeInit(), and dnxPluginInit().

pid_t serverpid [static]

Server child process id.

Definition at line 126 of file dnxNebMain.c.

Referenced by dnxPluginDeInit(), execServerProcess(), and terminateServerProcess().

time_t start_time [static]

The module start time.

Definition at line 123 of file dnxNebMain.c.

Referenced by dnxPluginExternal(), and main().


Generated on Tue Apr 13 15:48:08 2010 for DNX by  doxygen 1.5.6