/*
  Copyright Mission Critical Linux, 2000

  Kimberlite is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  Kimberlite is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Kimberlite; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
  MA 02139, USA.
 *
 * Author: Gregory P. Myrdal <Myrdal@MissionCriticalLinux.Com>
 *
 * svcState.c
 *
 *
 * This module contains library functions for services.  In general
 * it defines the functions that are needed to access shared service
 * information and perform service actions (start, stop, disable).
 */

/*
 * Version string that is filled in by CVS
 */
static const char *version __attribute__ ((unused)) = "$Revision: 1.18 $";

/*
 * System includes
 */
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/syslog.h>

/*
 * Cluster includes
 */
#include <clusterdefs.h>
#include <parseconf.h>
#include <svcmgr.h>
#include <logger.h>
#include <clu_lock.h>
#include <clucfg.h>

/*
 * Global variables
 */
int lockCnt=0;				// locks outstanding (max should be 1)

char *serviceActionStrings[] = {	// strings to be sent to action scripts
  "start",
  "stop",
  "disable",
  "waitandstart",
  "waitandstop",
  "donothing"
};

/*
 * Externally defined variables
 */
extern int listen_fd;			// listen file descriptor

/*
 * Externally defined functions
 */
extern int getServiceStatus(int svcID, ServiceBlock *svcStatus);
extern int setServiceStatus(ServiceBlock *svcStatus);
extern int lockRead(int nodeNum, DiskLockBlock *lock_block);
extern int cluGetDiskNodeStates(SharedDiskNodeStates *nodeStates);
extern int removeService(int svcNum);
extern int cluster_member_check(void);

/*
 * Forward defines of local functions
 */
static int execScript(char *script, int svcID);
 

/*****************************************************************************
 * Service status functions
 *****************************************************************************/

/*
 * reqServiceStatus
 *
 * Request the status of a service.  Given the service ID return the
 * service status information in svcStatusPPtr.
 *
 * Note: The caller to this function MUST manually call clu_lock()
 * and clu_un_lock().  This is shared data between nodes and only
 * one node should be changing the data at a time.
 */
int
reqServiceStatus(int svcID, ServiceBlock *svcStatusPtr)
{
	char *svcName;

	getSvcName(svcID, &svcName);

	if (getServiceStatus(svcID, svcStatusPtr) != 0)
	  {
	    clulog(LOG_ERR, "Cannot get service status for service %s\n", 
	           svcName);
	    return(FAIL);
	  }   

	return(SUCCESS);
}


/*
 * reqServiceStatusChange
 *
 * Request to change the status of a service.  The svcStatusPtr pointer points
 * to the new service status to be written to the shared service information.
 *
 * Note: The caller to this function MUST manually call clu_lock()
 * and clu_un_lock().  This is shared data between nodes and only
 * one node should be changing the data at a time.
 */
int
reqServiceStatusChange(ServiceBlock *svcStatusPtr)
{
	char *svcName;

	getSvcName(svcStatusPtr->id, &svcName);

	switch (svcStatusPtr->state)
	  {
	  case SVC_ERROR:
	    clulog(LOG_ALERT, "Service %s is in %s state\n",
	            svcName, serviceStateStrings[svcStatusPtr->state]);
	    break;
	  case SVC_RUNNING:
	  case SVC_STOPPED:
	  case SVC_DISABLED:
	    clulog(LOG_NOTICE, "Service %s is %s\n",
		    svcName, serviceStateStrings[svcStatusPtr->state]);
	    break;
	  default:
	    clulog(LOG_INFO, "Service %s is %s\n",
		    svcName, serviceStateStrings[svcStatusPtr->state]);
	    break;
	  }

	if (setServiceStatus(svcStatusPtr) != 0)
	  {
	    clulog(LOG_ERR, "Cannot update service %s to state %s\n",
	           svcName, serviceStateStrings[svcStatusPtr->state]);
	    return(FAIL);
	  }   

	return(SUCCESS);
}

/*
 * lockAndReqServiceStatus
 *
 * Request a write lock for the shared service information.
 */
int
lockAndReqServiceStatus(int svcID, ServiceBlock *svcStatusPtr)
{
	clu_lock();

	if (reqServiceStatus(svcID, svcStatusPtr) != SUCCESS)
	  {
	    clu_un_lock();
	    return(FAIL);
	  }   

	clu_un_lock();
	return(SUCCESS);
}

/*
 * lockAndReqServiceStatusChange
 *
 * Request a lock and change the shared service information.
 */
int
lockAndReqServiceStatusChange(ServiceBlock *svcStatusPtr)
{
	clu_lock();

	if (reqServiceStatusChange(svcStatusPtr) != SUCCESS)
	  {
	    clu_un_lock();
	    return(FAIL);
	  }

	clu_un_lock();
	return(SUCCESS);
}

/*
 * isServiceStateDisabled
 *
 * Determine if a service is listed as being disabled in the shared
 * service information.  This check is needed so administrative UI's
 * can determine if they can modify a service.  Return values are
 * YES if it is disabled, NO if it is not and FAIL if there was an
 * error.
 */
int
isServiceStateDisabled(int svcID, char *errMsg)
{
	ServiceBlock svcStatus;
	char *svcName;

	getSvcName(svcID, &svcName);

	if (reqServiceStatus(svcID, &svcStatus))
	  {
	    sprintf(errMsg, "Cannot get service status for %s\n", svcName);
	    return(FAIL);
	  }

	switch (svcStatus.state)
	  {
	  case SVC_DISABLED:
	    return(YES);

	  case SVC_STOPPED:
	  case SVC_ERROR:
	  case SVC_DISABLING:
	  case SVC_STARTING:
	  case SVC_STOPPING:
	  case SVC_RUNNING:
	    return(NO);

	  case SVC_UNINITIALIZED:
	  default:
	     sprintf(errMsg, "Cannot determine if service %s is disabled\n", 
	            svcName);
	     clulog(LOG_ERR, "%s", errMsg);
	     return(FAIL);
	  }

	sprintf(errMsg,"Cannot determine if service state for %s\n", svcName);
	clulog(LOG_ERR, "%s", errMsg);
	return(FAIL);	// should never get here
}

/*****************************************************************************
 * Functions that change the state of services
 *****************************************************************************/

/*
 * initServiceStatus
 *
 * Note: It is assumed that the caller locked the service information block
 * before calling this function.
 */
int
initServiceStatus(int svcID, char *errMsg)
{
	ServiceBlock svcStatus;
	char *svcName;

	svcStatus.id = svcID;
	svcStatus.owner = NODE_ID_NONE;

	getSvcName(svcID, &svcName);

	switch (isServiceDisabled(svcID))
	  {
	    case YES:
	      svcStatus.state = SVC_DISABLED;
	      break;
	    case NO:
	      svcStatus.state = SVC_STOPPED;
	      break;
	    case FAIL:
	    default:
	      clulog(LOG_ERR, "Cannot initialize service state for %s\n", 
	             svcName);
	      svcStatus.state = SVC_ERROR;
	      break;
	  }
	
	if (reqServiceStatusChange(&svcStatus) != SUCCESS)
	  {
	    if (removeService(svcID) != 0)
	      {
	        sprintf(errMsg,
"Cannot remove shared service information for %s\n", svcName);
	        clulog(LOG_ERR, "%s", errMsg);
              }
	    return(FAIL);
	  }

	return(SUCCESS);
}

/*
 * reqEnableService()
 *
 * Request the enable (start) of a service.  If the service start fails
 * attempt to stop it so that it is not partially configured on this system.
 * If stopping the service fails we do not know what might be configured
 * on the system so we put it in SVC_ERROR state for user intervention.
 *
 * This request is initiated by the user.
 */
int
reqEnableService(int svcID, char *errMsg)
{
	ServiceBlock svcStatus;
	ServiceBlock origStatus;
	char *svcName;
	char *svcOwnerName;
	SharedDiskNodeStates nodeStates;
	int myNodeID=cluGetLocalNodeId();

	getSvcName(svcID, &svcName);

	/*
	 * Make sure we are a cluster member, if not we can not
	 * enable services.
	 */
	if (cluster_member_check() != 1)
	  {
	    sprintf(errMsg, "Cannot enable service %s; not a cluster member\n",
	            svcName);
	    return(FAIL);
	  }

	clu_lock();

	if (reqServiceStatus(svcID, &svcStatus) != SUCCESS)
	  {
	    sprintf(errMsg,
"Cannot get service status; cannot enable service %s\n", svcName);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);
	  }

	origStatus.id = svcStatus.id;
	origStatus.owner = svcStatus.owner;
	origStatus.state = svcStatus.state;

	switch (svcStatus.state)
	  {
	  case SVC_STOPPED:	// its ok to enable services stopped/disabled
	  case SVC_DISABLED:
	    break;

	  /*
	   * Its ok to enable a service in the error state if this member
	   * owns the service or the owning member is down.
	   */
	  case SVC_ERROR:	

	    if (cluGetDiskNodeStates(&nodeStates) != 0)
	      {
	        sprintf(errMsg,
"Cannot get node states, cannot determine if owner of %s is down\n",
	                svcName);
	        clulog(LOG_ERR, "%s", errMsg);
	        clu_un_lock();
	        return(FAIL);
	      }


	    if ((myNodeID != svcStatus.owner) &&
	        (nodeStates.states[svcStatus.owner] != NODE_DOWN))
	      {
	        getNodeName(svcStatus.owner, &svcOwnerName);
	        sprintf(errMsg, 
"Service %s is in error state, it can only be enabled on %s\n",
	                svcName, svcOwnerName);
	        clulog(LOG_ERR, "%s", errMsg);
	        clu_un_lock();
	        return(FAIL);
	      }
	    break;

	  case SVC_STARTING:
	  case SVC_STOPPING:
	  case SVC_RUNNING:
	    /*
	     * If the owning node is down its ok to enable the service
	     */
	    if (cluGetDiskNodeStates(&nodeStates) == 0)
	      {
	        if (nodeStates.states[svcStatus.owner] == NODE_DOWN)
	            break;
	      }

	    sprintf(errMsg,
"Cannot enable a service that is in the %s state\n",
	            serviceStateStrings[svcStatus.state]);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);

	  default:
	    break;
	  }

	svcStatus.id=svcID;
	svcStatus.owner=myNodeID;
	svcStatus.state=SVC_STARTING;

	if (reqServiceStatusChange(&svcStatus) != SUCCESS)
	  {
	    clu_un_lock();
	    return(FAIL);
	  }

	clu_un_lock();

	if ((startService(svcID)) != SUCCESS)
	  {
	    svcStatus.state = SVC_DISABLING;
	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        return(FAIL);

	    if ((stopService(svcID)) != SUCCESS)
	      {
	        sprintf(errMsg, "Failed to stop service after failed start\n");
	        clulog(LOG_ERR,  "%s", errMsg);
	        svcStatus.state = SVC_ERROR;
	      }
	    else
	      {
	        switch (origStatus.state)
	          {
	          case SVC_ERROR:
	            /*
	             * Looks like we could successfully stop the service so
	             * we can remove it from the error state.  Look in the
	             * database and put it into the state defined by the user.
	             * Ie. disabled or not.
	             */
	            svcStatus.owner = NODE_ID_NONE;
	            switch (isServiceDisabled(svcID))
	              {
	                case YES:
	                  svcStatus.state = SVC_DISABLED;
	                  break;
	                case NO:
	                  svcStatus.state = SVC_STOPPED;
	                  break;
	                case FAIL:
	                default:
	                  clulog(LOG_ERR, 
"Cannot determine if service %s is disabled; assigning it to stopped state\n", 
	                         svcName);
	                  svcStatus.state = SVC_STOPPED;
	                  break;
	              }
	              break;

	          case SVC_DISABLED:
	          case SVC_DISABLING:
	            svcStatus.state = SVC_DISABLED;
	            svcStatus.owner = NODE_ID_NONE;
	            break;

	          case SVC_STOPPED:
	          case SVC_RUNNING:
	          case SVC_STOPPING:
	          case SVC_STARTING:
	            svcStatus.state = SVC_STOPPED;
	            svcStatus.owner = NODE_ID_NONE;
	            break;

	          default:
	            sprintf(errMsg,
"reqEnableService: Unknown service state '%d'\n", origStatus.state);
	            clulog(LOG_ERR, "%s", errMsg);
	            return(FAIL);
	          }
	      }

	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        return(FAIL);

	    return(FAIL);
	  }

	svcStatus.state = SVC_RUNNING;
	if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	    return(FAIL);

	return(SUCCESS);
}

/*
 * reqDisableService()
 *
 * Request a service disable.  This is initiated by the user to stop the
 * service from running and keeping it from running by any member in the
 * cluster. 
 */
int
reqDisableService(int svcID, char *errMsg)
{
	ServiceBlock svcStatus;
	ServiceBlock origStatus;
	char *svcName;
	int stopNeeded=0;
	SharedDiskNodeStates nodeStates;
	int myNodeID=cluGetLocalNodeId();
	char *svcOwnerName=(char *)NULL;

	getSvcName(svcID, &svcName);

	clu_lock();

	if (reqServiceStatus(svcID, &svcStatus) != SUCCESS)
	  {
	    sprintf(errMsg, 
"Cannot get service status; cannot disable service %s\n", svcName);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);
	  }

	origStatus.id = svcStatus.id;
	origStatus.owner = svcStatus.owner;
	origStatus.state = svcStatus.state;

	switch (svcStatus.state)
	  {
	  /*
	   * Note: it is assumed that the calling UI knows where the
	   * service is running, but we check here to make sure that
	   * we are running it before trying to stop.
	   */
	  case SVC_RUNNING:
	  case SVC_ERROR:
	    if (myNodeID != svcStatus.owner)
	      {
	        getNodeName(svcStatus.owner, &svcOwnerName);
	        sprintf(errMsg,
"Cannot disable service %s locally; service is owned by %s\n", 
	                svcName, svcOwnerName);
	        clulog(LOG_NOTICE, "%s", errMsg);
	        clu_un_lock();
	        return(FAIL);
	      }
	    stopNeeded=1;
	    break;

	  case SVC_STOPPED:	
	    break;

	  case SVC_STARTING:
	  case SVC_STOPPING:
	    /*
	     * If the owning node is down its ok to disable the service
	     */
	    if (cluGetDiskNodeStates(&nodeStates) == 0)
	      {
	        if (nodeStates.states[svcStatus.owner] == NODE_DOWN)
	            break;
	      }

	    sprintf(errMsg, 
"Cannot disable a service that is in the %s state\n",
	            serviceStateStrings[svcStatus.state]);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);

	  case SVC_DISABLED:
	    sprintf(errMsg, "Service is already disabled\n");
	    clulog(LOG_NOTICE, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);

	  default:
	    break;
	  }

	svcStatus.owner = myNodeID;
	svcStatus.state = SVC_DISABLING;

	if (reqServiceStatusChange(&svcStatus) != SUCCESS)
	    return(FAIL);

	clu_un_lock();

	if (stopNeeded && ((stopService(svcID)) != SUCCESS))
	  {

	    if (origStatus.state == SVC_ERROR)
	      {
	        sprintf(errMsg,
"Cannot stop service %s; setting it back to error state.\n", svcName);
	        svcStatus.state = SVC_ERROR;
	        lockAndReqServiceStatusChange(&svcStatus);
	        return(FAIL);
	      }

	    svcStatus.state = SVC_STARTING;
	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        return(FAIL);

	    if ((startService(svcID)) != SUCCESS)
	      {
	        sprintf(errMsg,
"Failed to restart service after failed stop\n");
	        clulog(LOG_ERR, "%s", errMsg);
	        svcStatus.state = SVC_ERROR;
	        if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	            return(FAIL);

	        return(FAIL);
	      }

	    svcStatus.state = SVC_RUNNING;
	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        return(FAIL);

	    return(FAIL);
	  }

	svcStatus.state = SVC_DISABLED;
	svcStatus.owner = NODE_ID_NONE;
	if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	    return(FAIL);

	return(SUCCESS);
}

/*
 * addService()
 *
 * Request a service add.  This is initiated by the user to add a service 
 * to the cluster. 
 */
int
addService(int svcID, char *errMsg)
{
	ServiceBlock svcStatus;
	char *svcName;

	getSvcName(svcID, &svcName);

	/*
	 * Make sure the service does not exist
	 */
	clu_lock();

	if (getServiceStatus(svcID, &svcStatus) == 0)
	  {
	    sprintf(errMsg, 
"Service ID %d already exists in the shared state information\n", svcID);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);
	  }

	if (initServiceStatus(svcID, errMsg) != SUCCESS)
	  {
	    sprintf(errMsg,
"Cannot initialize service status information for %s\n", svcName);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);
	  }

	clu_un_lock();

	return(SUCCESS);
}

/*
 * deleteService()
 *
 * Request a service delete.  This is initiated by the user to remove a service 
 * from the cluster. 
 */
int
deleteService(int svcID, char *errMsg)
{
	ServiceBlock svcStatus;
	char *svcName;

	getSvcName(svcID, &svcName);

	/*
	 * Make sure the service does not exist
	 */
	clu_lock();

	if (getServiceStatus(svcID, &svcStatus) != 0)
	  {
	    clulog(LOG_ERR, 
	           "Service ID %d does not exist, deleting anyway\n", svcID);
	  }

	if (removeService(svcID) != 0)
	  {
	    sprintf(errMsg,
"Cannot remove shared service information for %s\n", svcName);
	    clulog(LOG_ERR, "%s", errMsg);
	    clu_un_lock();
	    return(FAIL);
	  }

	clu_un_lock();

	return(SUCCESS);
}

/*
 * startService()
 *
 * Start a service.
 */
int
startService(int svcID)
{
	int retVal=0, pid;
	char *svcName;

	getSvcName(svcID, &svcName);

	if ((pid=execScript(SVC_START_SCRIPT, svcID)) == FAIL)
	  {
	    clulog(LOG_ERR, 
"Cannot exec action script, cannot start service %s\n", svcName);
	    return(FAIL);
	  }

	clulog(LOG_DEBUG, 
		"Waiting on return of forked start process %d\n", pid);
	pid=wait(&retVal);
	clulog(LOG_DEBUG, 
		"Start service finished by return of process %d\n", pid);
	if (retVal != 0)
	  {
	    clulog(LOG_ERR, 
			"Cannot start service %s, error = %d, child = %d\n",
	                     svcName, retVal, pid);
	    return(FAIL);
	  }

	return(SUCCESS);
}

/*
 * stopService()
 *
 * Stop a service.
 */
int
stopService(int svcID)
{
	int retVal=0, pid;
	char *svcName;

	getSvcName(svcID, &svcName);

	if ((pid=execScript(SVC_STOP_SCRIPT, svcID)) == FAIL)
	  {
	    clulog(LOG_ERR, 
"Cannot exec action script, cannot stop service %s\n", svcName);
	    return(FAIL);
	  }

	clulog(LOG_DEBUG, 
		"Waiting on return of forked stop process %d\n", pid);
	pid=wait(&retVal);
	clulog(LOG_DEBUG, 
		"Stop service finished by return of process %d\n", pid);

	if (retVal != 0)
	  {
	    clulog(LOG_ERR, "Cannot stop service %s, error = %d\n",
	                     svcName, retVal);
	    return(FAIL);
	  }

	return(SUCCESS);
}

/*
 * disableService()
 *
 * Disable a service.
 */
int
disableService(int svcID)
{
	int retVal=0, pid;
	char *svcName;

	getSvcName(svcID, &svcName);

	if ((pid=execScript(SVC_STOP_SCRIPT, svcID)) == FAIL)
	  {
	    clulog(LOG_ERR, 
"Cannot exec action script, cannot disable service %s\n", svcName);
	    return(FAIL);
	  }

	clulog(LOG_DEBUG, 
		"Waiting on return of forked disable process %d\n", pid);
	pid=wait(&retVal);		/* wait for child to finish */
	clulog(LOG_DEBUG, 
		"Disable service finished by return of process %d\n", pid);
	if (retVal != 0)
	  {
	    clulog(LOG_ERR, "Cannot disable service %s, error = %d\n",
	            svcName, retVal);
	    return(FAIL);
	  }

	return(SUCCESS);
}

/*****************************************************************************
 * Misc support functions
 *****************************************************************************/

/*
 * closeChildFDs
 *
 * Close all of the Service Manager open file descriptors and connections
 * in a forked child process.
 *
 * Note: Do not call msg_close() as this will remove the descriptors from
 * the message service.
 */
int
closeChildFDs(void)
{
	clulog_close();		// close our connection to the loggerd
	if (listen_fd >= 0)
	    close(listen_fd);	// close our listen file descriptor

	return(SUCCESS);
}

/*
 * execScript()
 *
 * Exec a start/stop/disable action script.  If the action script fails
 * send back the appropriate return status such that the higher layers of
 * the service manager can determine what to do next with the service.
 */
static int
execScript(char *script, int svcID)
{
	int pid;
	char *script_argv[5];
	char svcIDstr[10];
	char *svcName;

	switch (pid=fork())
	  {
	  case -1:				// error
	    sprintf(errbuf, "fork failed: %s", sys_errlist[errno]);
	    clulog(LOG_ERR, "%s", errbuf);
	    return(FAIL);
	    break;

	  case 0:				// child
	    break;

	  default:				// parent
	      return(pid);
	    break;
	  }
	
	closeChildFDs();		// close any file descriptors

	getSvcName(svcID, &svcName);

	clulog(LOG_INFO, "Exec'ing service %s script '%s %d'\n", 
	       svcName, script, svcID);

	sprintf(svcIDstr, "%d", svcID);
	script_argv[0]=script;
	script_argv[1]=svcIDstr;
	script_argv[2]=(char *)NULL;

	if (execv(script, script_argv) != 0) 
	  {
	    sprintf(errbuf, "Cannot exec service %s script '%s %d': %s\n", 
	            svcName, script, svcID, sys_errlist[errno]);
	    clulog(LOG_ERR, "%s", errbuf);
	    return(FAIL);
	  }

	return(SUCCESS);		// should never get here
}
