/*
  Copyright Mission Critical Linux, 2000

  Kimberlite is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  Kimberlite is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Kimberlite; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
  MA 02139, USA.
*/
/*
 *
 *  Copyright (C) 2000 Mission Critical Linux, LLC
 *
 *  author: Tim Burke <burke@missioncriticallinux.com>
 *
 *  diskcheck.c
 *
 *  $Revision: 1.6 $
 *
 * Background:
 * For reduncancy protection against user error, we have a "primary"
 * quorum partition and a "shadow" quorum partition.  All writes are issued
 * to both partitions.  Reads are randomly issued to either primary or shadow
 * partition. Should that fail a read will be issued to the other partition.
 * If that succeeds, the corrupted block will be repaired by copying in from
 * the correct version.
 *
 * What this module does:
 * Corruption on either of the 2 partitions is repaired only on read requests.
 * For infrequently accessed regions of the partition, a corruption could
 * go undetected (hence unrepaired), leaving you down to only one good copy
 * for extended periods of time.  This module simply "sweeps" across the
 * disk by reading the whole thing in.  Any corrupted portions should then
 * be repaired.  Since this is called out of the main body of the quorum
 * daemon, it does not sweep the whole disk at one time.  Rather, each time
 * it is called a separate region of the disk is read.  Over time there should
 * be complete coverage.
 */
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/errno.h>
#include <signal.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/syslog.h>

#include <logger.h>
#include <sys/syslog.h>
#include "diskstate.h"
#include "disk_proto.h"
#include <msgsvc.h>
#include <clusterdefs.h>
#include <parseconf.h>
#include <power.h>
#include <logger.h>

static const char *version __attribute__ ((unused)) = "$Id: diskcheck.c,v 1.6 2000/09/07 23:25:49 winchell Exp $";

/*
 * Forward routine declarations.
 */

/*
 * Read disk header.
 */
static void scanHeader(void) {
	SharedStateHeader hdr;

	if(clu_try_lock() == LOCK_SUCCESS) {
		if (readHeader(&hdr) < 0) {
			clulog(LOG_ERR, "scanHeader: read of disk header failed.\n");
		}
		else {
			clulog(LOG_DEBUG, "scanHeader: read disk header.\n");
		}
		clu_un_lock();
	}
}
static void scanStatusBlocks(void)
{
	int i;
	NodeStatusBlock statb;

	if(clu_try_lock() == LOCK_SUCCESS) {
		for(i = 0; i < MAX_NODES; i++)
			readStatusBlock(OFFSET_FIRST_STATUS_BLOCK + (i * SPACE_PER_STATUS_BLOCK),
					&statb,  0);
		clu_un_lock();
	}
}

/*
 * Read in the full set of service blocks.
 */
static void scanServiceBlocks(void) {

	if(clu_try_lock() == LOCK_SUCCESS) {
		if (readAllServiceBlocks() < 0) {
			clulog(LOG_ERR, "scanServiceBlocks: read of service blocks failed.\n");
		}
		else {
			clulog(LOG_DEBUG, "scanServiceBlocks: read service blocks.\n");
		}
		clu_un_lock();
	}
}

/*
 * Read in database header and contents.
 */
static void scanDatabase(void) {
    if (readScanWholeDatabase() < 0) {
	clulog(LOG_ERR, "scanDatabase: read of database failed.\n");
    }
    else {
	clulog(LOG_DEBUG, "scanDatabase: read database.\n");
    }
}

/*
 * Read in the full set of structures.
 */
void scanWholeDisk(void) {
     int ret;

     printf("--------Starting full disk scan ------------\n");
     clu_lock_repair();
     clulog(LOG_DEBUG, "scanWholeDisk: commence scan.\n");
     printf("--------Scanning Header ------------\n");
     scanHeader();
     printf("--------Scanning Status Blocks ------------\n");
     scanStatusBlocks();
     printf("--------Scanning Service Blocks ------------\n");
     scanServiceBlocks();
     printf("--------Scanning Database ------------\n");
     scanDatabase();
     printf("--------Scanning NetBlock ------------\n");
     ret = readRepairNetBlock();
     printf("--------Completed full disk scan ------------\n");
     clulog(LOG_DEBUG, "scanWholeDisk: concluded scan.\n");
}
/*
 * To avoid bogging down the disk pinging too long, we only read in one
 * category at a time for potential repair.  So this routine just keeps track
 * of where we left off and continues on with the next one.
 *
 * Parameter - delaySecs - specifies how many seconds inbetween the read
 * of the next category.  Allows this to be called more frequently and bury
 * the logic here to determine how often to perform the actual reads.
 */
static int nextCategory = 0;
time_t time_last_scan;
void scanNextCategory(int delaySecs) {
    time_t time_now;
    int ret;

    if (delaySecs > 0) {
        time_now = time(NULL);
	if ((time_now - time_last_scan) < delaySecs) {
	    return;
	}
	time_last_scan = time_now;
    }

    if(!nextCategory)
	    flipPreferredReadPartition();

    switch (nextCategory) {
	case 0: clu_lock_repair(); break;
	case 1: scanHeader(); break;
	case 2: scanStatusBlocks(); break;
	case 3: scanServiceBlocks(); break;
	case 4: scanDatabase(); break;
        case 5: ret = readRepairNetBlock(); break; 		
	default: nextCategory = 0;
    }
    nextCategory++;
    if (nextCategory > 5)
	nextCategory = 0;
}

