/*
  Copyright Mission Critical Linux, 2000

  Kimberlite is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  Kimberlite is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Kimberlite; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
  MA 02139, USA.
*/
/*
 *  $Id: diskservices.c,v 1.8 2000/09/07 23:25:49 winchell Exp $
 *
 *  Copyright (C) 2000 Mission Critical Linux, LLC
 *
 *  author: Tim Burke <burke@missioncriticallinux.com>
 *  description: Interface to Service descriptions.
 *
 * diskservices.c
 *
 * This file implements the routines used to represent the set of 
 * services being served by a node.  Its main job is to control the
 * "ServiceBlock" structures on-disk.
 */
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <signal.h>
#include <errno.h>
#include <clu_lock.h>

#include <logger.h>
#include <sys/syslog.h>
#include "diskstate.h"
#include "disk_proto.h"

/*
 * Forward routine declarations.
 */
static void initServiceBlock(DiskServiceBlock *svcblk, int svcNum);
static int writeServiceBlock(DiskServiceBlock *svcblk);
void printServiceBlock(DiskServiceBlock *svcblk);

static const char *version __attribute__ ((unused)) = "$Id: diskservices.c,v 1.8 2000/09/07 23:25:49 winchell Exp $";
static int subsysInitialized = 0;
/*
 * .............Configurable Parameters...................
 *
 * The following tuning knobs are intended to allow customization.
 */
/*
 * We tolerate a few IO errors before reacting.
 * This parameter defines how many consecutive errors are needed
 * to declare a true IO failure condition.  It is intended to avoid
 * over-reacting to an intermittent error.
 */
static int max_consecutive_io_errors = MAX_CONSECUTIVE_IO_ERRORS;

/*
 * Called to initialize subsystem state variables.
 * Also opens the file descriptor representing the shared state partition.
 * 
 * Returns: 0 on success.
 */
int initServiceSubsys() {

    if (subsysInitialized) {
	clulog(LOG_DEBUG, "initServiceSubsys: already initialized.\n");
	return(0);
    }

    if (initAlignedBufStuff() < 0) {
        clulog(LOG_EMERG, "initServiceSubsys: unable to init rawio support.\n");
        return(-1);
    }
    subsysInitialized = 1;
    return(0);
}

/*
 * Called to release resources obtained in initServiceSubsys.
 * 
 * Returns: 0 on success.
 */
int closeServiceSubsys(void) {

    if (subsysInitialized == 0) {
	clulog(LOG_DEBUG, "closeServiceSubsys: Subsystem not open.\n");
	return(0);
    }
    deinitAlignedBufStuff();
    subsysInitialized = 0;
    return 0;
}

/*
 * Called to initialize a service block.  Setting the state to 
 * SVC_UNINITIALIZED means that the service does not represent a service
 * which is currently in the service description file, rather it is an
 * unused or deleted service.
 * During the course of normal operation, various fields within this block
 * will be updated accordingly.
 */
static void initServiceBlock(DiskServiceBlock *svcblk, int svcNum) {
    bzero((void *)svcblk, sizeof(DiskServiceBlock));
    svcblk->magic_number = SERVICE_BLOCK_MAGIC_NUMBER;
    svcblk->version = SERVICE_BLOCK_LATEST_VERSION;
    svcblk->svcblk.state = SVC_UNINITIALIZED;
    svcblk->svcblk.id = svcNum;
    svcblk->pad = 0;
}

/*
 * Write the service block out to disk.
 * Returns: -1 on IO error, -2 on parameter error, 0 on success.
 */
static int writeServiceBlock(DiskServiceBlock *svcblk) {
	off_t offsetService;
	int svcNum;


	assert_clu_lock_held("writeServiceBlock");
	svcNum = svcblk->svcblk.id;
	// Paranoia checks
	if ((svcNum < 0) || (svcNum >= MAX_SERVICES)) {
		clulog(LOG_ERR, "writeServiceBlock: Invalid service number %d.\n",
			svcNum);
		return(-2);
	}
	if(svcblk->svcblk.state < 0 || svcblk->svcblk.state > SVC_LAST_STATE) {
		clulog(LOG_ERR, "writeServiceBlock: Invalid state %d.\n",
			svcblk->svcblk.state);
		return(-2);
	}
	if ((svcblk->svcblk.owner != NODE_ID_NONE) &&
	    ((svcblk->svcblk.owner < 0) || 
	    (svcblk->svcblk.owner >= MAX_NODES))) {
		clulog(LOG_ERR, "writeServiceBlock: Invalid owner number %d.\n",
			svcblk->svcblk.owner);
		return(-2);
	}
        if (subsysInitialized == 0) {
	    if (initServiceSubsys() != 0) {
	        clulog(LOG_ERR, "writeServiceBlock: Subsystem init failure.\n");
	        return(-2);
	    }
        }
	if (svcblk->magic_number != SERVICE_BLOCK_MAGIC_NUMBER) {
		clulog(LOG_ERR, "writeServiceBlock: invalid magic# 0x%lx\n",
			svcblk->magic_number);
		return(-2);
	}
    	offsetService = (OFFSET_FIRST_SERVICE_BLOCK + 
			(svcNum * SPACE_PER_SERVICE_BLOCK));

	return diskRawWriteShadow(offsetService, (char *)svcblk, sizeof(DiskServiceBlock),
				  (ulong)&((DiskServiceBlock *)0)->check_sum);

}

/*
 * Reads in the service block from the shared partition.
 * Stuffing the results into the passed data struct.
 * Returns: -1 on IO error, -2 on parameter error, 0 on success.
 */
static int readServiceBlock(int svcNum, DiskServiceBlock *svcblk) {
	off_t offsetService;
	int ret;


        bzero((void *)svcblk, sizeof(DiskServiceBlock)); // paranoia
	if ((svcNum < 0) || (svcNum >= MAX_SERVICES)) {
	    clulog(LOG_ERR, "readServiceBlock: Invalid service number %d.\n",
			svcNum);
	    return(-2);
	}
        if (subsysInitialized == 0) {
	    if (initServiceSubsys() != 0) {
	        clulog(LOG_ERR, "readServiceBlock: Subsystem init failure.\n");
	        return(-2);
	    }
        }
    	offsetService = (OFFSET_FIRST_SERVICE_BLOCK + 
			(svcNum * SPACE_PER_SERVICE_BLOCK));

	ret = diskRawReadShadow(offsetService, (char *)svcblk, 
			sizeof(DiskServiceBlock),
		  	(ulong)&((DiskServiceBlock *)0)->check_sum, 1);

	if(ret) {
		clulog(LOG_ERR, "readServiceBlock: bad ret %d from diskRawReadShadow\n", ret);
		return(ret);
	}		
	/*
         * Do at least a primitive level of validation to see if it looks like
	 * a viable service block.
	 */
	if (svcblk->magic_number != SERVICE_BLOCK_MAGIC_NUMBER) {
		clulog(LOG_ERR, "readServiceBlock: Invalid magic # 0x%lx.\n",
			svcblk->magic_number);
		return(-2);
	} 
	if (svcblk->svcblk.id != svcNum) {
	    clulog(LOG_EMERG, "readServiceBlock: Service number mismatch %d, %d.\n",
		svcNum, svcblk->svcblk.id);
	    printServiceBlock(svcblk);
	    return(-2);
	}
	return(0);
}

void printServiceBlock(DiskServiceBlock *svcblk) {

    clulog(LOG_DEBUG, "------ Service Block ------------\n");
    clulog(LOG_DEBUG, "magic# = 0x%lx\n", svcblk->magic_number);
    clulog(LOG_DEBUG, "version = %d\n", svcblk->version);
    clulog(LOG_DEBUG, "Service Number = %d\n", svcblk->svcblk.id);
    clulog(LOG_DEBUG, "owner = %d\n", svcblk->svcblk.owner);
    clulog(LOG_DEBUG, "state = %d, %s\n", svcblk->svcblk.state, 
		serviceStateStrings[svcblk->svcblk.state]);
    clulog(LOG_DEBUG, "------------------------------\n");
}

/*
 * Debug routine to print out the contents of the service list.
 */
void printServiceList(void) {
    int i, retval;
    int svc_count;
    DiskServiceBlock svcblk;

    
    if (subsysInitialized == 0) {
	    if (initServiceSubsys() != 0) {
	        clulog(LOG_ERR, "printServiceList: Subsystem init failure.\n");
	        return;
	    }
    }
    svc_count = 0;
    clulog(LOG_DEBUG, "==================== Service List ============================\n");
    for (i=0; i<MAX_SERVICES; i++) {
	retval = readServiceBlock(i, &svcblk);
        if (retval != 0) {
            clulog(LOG_DEBUG, "printServiceList: unable to read block %d.\n", i);
            return;
        }
	/*
 	 * Anything with a state of uninitialized does not really represent
	 * a service, its basically an "empty" entry in the service array.
	 */
	if (svcblk.svcblk.state == SVC_UNINITIALIZED) {
	    continue;
	}
	svc_count++;
	printServiceBlock(&svcblk);
    }
    clulog(LOG_DEBUG, "Service Count = %d\n",svc_count);
    clulog(LOG_DEBUG, "==============================================================\n");
}

/*
 * Initialize the on-disk data structures representing services being served.
 * This will later be overwritten when the disk service status subsystem
 * is initialized.  Its main purpose is to wipe the disk to a clean slate.
 * Returns: 0 on success.
 */
int initializePartitionServiceBlocks(void) {
    DiskServiceBlock servb;
    int retval;
    off_t offset;
    int i;

    /*
     * Just wiping out any prior settings.
     */
    for (i=0; i < MAX_SERVICES; i++) {
	initServiceBlock(&servb, i);
    	offset = (OFFSET_FIRST_SERVICE_BLOCK + (i * SPACE_PER_SERVICE_BLOCK));
    	retval = writeServiceBlock(&servb);
        if (retval != 0) {
	    clulog(LOG_CRIT, "initializePartitionServiceBlocks: unable to initialize partition service blocks.\n");
	    return(retval);
        }
    }
    clulog(LOG_DEBUG, "initializePartitionServiceBlocks: successfully initialized %d service blocks.\n", MAX_SERVICES);
    return(0);
}
/*
 * Read the on-disk data structures representing all services.
 * This is called periodically as part of the read/repair service.
 * Returns: 0 on success.
 */
int readAllServiceBlocks(void) {
    DiskServiceBlock servb;
    int retval;
    off_t offset;
    int i;

    /*
     * Just wiping out any prior settings.
     */
    for (i=0; i < MAX_SERVICES; i++) {
    	offset = (OFFSET_FIRST_SERVICE_BLOCK + (i * SPACE_PER_SERVICE_BLOCK));
        retval = readServiceBlock(i, &servb);
        if (retval != 0) {
	    clulog(LOG_ERR, "readAllServiceBlocks: unable to read partition service blocks.\n");
	    return(retval);
        }
    }
    return(0);
}

/*
 * Externally accessible API used to retrieve the memory resident version
 * of a service description.  Note: this differs from the on-disk version!
 *
 * Returns: 0 - success
 *	   -1 - service description is not active, meaning that it was never
 *		added before, or if so it has been since deleted. Or an 
 *		invalid parameter was passed.
 * Side Effect: Reboots on inability to access shared disk.
 */
int getServiceStatus(int svcNum, ServiceBlock *svcblk) {
    DiskServiceBlock diskSvcBlk;
    int retval;
    int retries = 0;

    if ((svcNum < 0) || (svcNum >= MAX_SERVICES)) {
	clulog(LOG_ERR, "getServiceStatus: Invalid service number %d.\n",
		svcNum);
	return(-1);
    }
    if (subsysInitialized == 0) {
	    if (initServiceSubsys() != 0) {
	        clulog(LOG_ERR, "getServiceStatus: Subsystem init failure.\n");
	        return(-2);
	    }
    }
    while (retries++ < max_consecutive_io_errors) {
        retval = readServiceBlock(svcNum, &diskSvcBlk);
	if (retval == 0) {
    	    if (diskSvcBlk.svcblk.state == SVC_UNINITIALIZED) {
	        return(-1);
    	    }
    	    bcopy(&diskSvcBlk.svcblk, svcblk, sizeof(ServiceBlock));
	    return(0);
	}
	if (retval == -2) { // invalid parameter
	    return(-1);
	}
    }
    /*
     * Inability to write to the shared state partition constitutes
     * unsafe operation.  Initiate a clean shutdown in the hopes that
     * some cleanup can be done before we inevitably get shot.
     */
    consider_shutdown("Cluster Instability: can't read service status block.");
    return(-1);
}

/*
 * Externally accessible API used to write the memory resident version
 * of a service description out to disk.  
 *
 * Returns: 0 - success
 *	   -1 - invalid service description
 * Side Effect: Reboots on inability to access shared disk.
 */
int setServiceStatus(ServiceBlock *svcblk) {
    DiskServiceBlock diskSvcBlk;
    int retval;
    int svcNum;
    int retries = 0;

	    
    svcNum = svcblk->id;
    if ((svcNum < 0) || (svcNum >= MAX_SERVICES)) {
	clulog(LOG_ERR, "setServiceStatus: Invalid service number %d.\n",
		svcNum);
	return(-1);
    }
    if (svcblk->state == SVC_UNINITIALIZED) {
	clulog(LOG_ERR, "setServiceStatus: Invalid state %d, service number %d.\n",
		svcblk->state, svcNum);
	return(-1);
    }
    if (subsysInitialized == 0) {
	    if (initServiceSubsys() != 0) {
	        clulog(LOG_CRIT, "setServiceStatus: Subsystem init failure.\n");
	        return(-2);
	    }
    }
    initServiceBlock(&diskSvcBlk, svcNum);
    bcopy(svcblk,&diskSvcBlk.svcblk, sizeof(ServiceBlock));
    while (retries++ < max_consecutive_io_errors) {
        retval = writeServiceBlock(&diskSvcBlk);
	if (retval == 0) {
	    return(0);
	}
	if (retval == -2) { // invalid parameter
	    return(-1);
	}
    }
    /*
     * Inability to write to the shared state partition constitutes
     * unsafe operation.  Initiate a clean shutdown in the hopes that
     * some cleanup can be done before we inevitably get shot.
     */
    consider_shutdown("Cluster Instability: can't write service status block.");
    return(-1);
}

/*
 * Externally accessible API used to delete a service description from the
 * on-disk representation.
 *
 * Returns: 0 - success
 *	   -1 - service description on disk does not represent an active service
 *	        or invalid parameter.
 * Side Effect: Reboots on inability to access shared disk.
 */
int removeService(int svcNum) {
    DiskServiceBlock svcblk;
    int retval;
    int retries = 0;



    clulog(LOG_DEBUG, "removeService: removing service number %d.\n",svcNum);
    if ((svcNum < 0) || (svcNum >= MAX_SERVICES)) {
	clulog(LOG_ERR, "removeService: Invalid service number %d.\n",
		svcNum);
	return(-1);
    }
    if (subsysInitialized == 0) {
	    if (initServiceSubsys() != 0) {
	        clulog(LOG_ERR, "removeService: Subsystem init failure.\n");
	        return(-1);
	    }
    }
    // Set state to uninitialized
    initServiceBlock(&svcblk, svcNum);
    while (retries++ < max_consecutive_io_errors) {
        retval = readServiceBlock(svcNum, &svcblk);
	if (retval == 0) {
	    break;
	}
	if (retval == -2) { // invalid parameter
	    return(-1);
	}
    }
    if (retval != 0) { 
        /*
         * Inability to write to the shared state partition constitutes
         * unsafe operation.  Initiate a clean shutdown in the hopes that
         * some cleanup can be done before we inevitably get shot.
         */
        consider_shutdown("Cluster Instability: can't read service status block to remove service.");
        return(-1);
    }
    if (svcblk.svcblk.state == SVC_UNINITIALIZED) {
	clulog(LOG_ERR, "removeService: service %d already removed.\n", svcNum);
	return(-1);
    }
    // Set state to uninitialized
    initServiceBlock(&svcblk, svcNum);
    retries = 0;
    while (retries++ < max_consecutive_io_errors) {
        retval = writeServiceBlock(&svcblk);
	if (retval == 0) {
	    return(0);
	}
	if (retval == -2) { // invalid parameter
	    return(-1);
	}
    }
    /*
     * Inability to write to the shared state partition constitutes
     * unsafe operation.  Initiate a clean shutdown in the hopes that
     * some cleanup can be done before we inevitably get shot.
     */
    consider_shutdown("Cluster Instability: can't remove service status block.");
    return(-1);
}


