/*
 *	Ohio Trollius
 *	Copyright 1996 The Ohio State University
 *	RBD/GDB
 *
 *	$Id: asc_run.c,v 6.1 96/11/22 13:56:41 nevin Rel $
 *
 *	Function:	- application execution
 */

#include <stdio.h>
#include <stdlib.h>

#include <all_list.h>
#include <args.h>
#include <app_mgmt.h>
#include <app_schema.h>
#include <bfreq.h>
#include <freq.h>
#include <kio.h>
#include <net.h>
#include <portable.h>
#include <preq.h>
#include <terror.h>
#include <typical.h>

/*
 * global functions
 */
int			asc_ger();
int			asc_run();

/*
 * external variables
 */
extern struct kio_t	_kio;			/* kernel I/O block */ 

/*
 * local functions
 */
static void		fdincr();
static void		starterror();

/*
 *	asc_ger
 *
 *	Function:	- sets GER buffer limits for application
 *	Accepts:	- application schema
 *			- error printing flag
 */
int
asc_ger(appd, flag_print)

LIST			*appd;
int			flag_print;

{
	int4		anode;			/* LOCAL filtered out */
	int4		nnodes;			/* # nodes in session */
	int4 *		nodes;			/* the node IDs */
	int		i;
	int *		nprocs_node;		/* # procs on each node */
	int		nprocs;			/* # procs in application */
	struct aschema *p;
	struct bfparms	bfp;			/* bufferd parameters */
/*
 * Get an array of nodeids and allocate a mirror array of process counts.
 */
	nnodes = getnall();
	nodes = (int4 *) malloc((unsigned) (nnodes * sizeof(int4)));
	if (getall(nodes, nnodes)) return(LAMERROR);
	nprocs_node = (int *) malloc((unsigned) (nnodes * sizeof(int)));
	if (nprocs_node == 0) return(LAMERROR);

	for (i = 0; i < nnodes; i++) nprocs_node[i] = 0;

	nprocs = al_count(appd);
/*
 * Tally the number of processes on each node.
 */
	p = (struct aschema *) al_top(appd);

	while (p) {
/*
 * Nodeids are not necessarily 0 to N-1, so we have to search for it.  (BFI)
 */
		for (i = 0; i < nnodes; ++i) {
			anode = (p->asc_node == LOCAL) ? getnodeid() :
					p->asc_node;

			if (anode == nodes[i]) break;
		}

		if (i >= nnodes) {
			errno = EBADNODE;

			if (flag_print) {
				fprintf(stderr, "mpirun: %s: ",
						mnemonic(p->asc_node));
				terror("");
			}

			return(LAMERROR);
		}

		nprocs_node[i]++;
		p = (struct aschema *) al_next(appd, p);
	}
/*
 * Adjust the buffer limit on each node.
 */
	for (i = 0; i < nnodes; i++) {

		if (nprocs_node[i] == 0) continue;

		bfp.bfp_maxspace =
/*
 * the basic GER requirement
 */
			(MPI_GER * (MAXNMSGLEN + sizeof(struct nmsg)) *
			nprocs * nprocs_node[i]) +
/*
 * GER flow control messages - This one is tricky.  For processes
 * on your own node, you will only see the envelopes or the flow control
 * messages, but not max of both.  Since we have already covered local
 * processes above, we need not count them again here.
 */
			((MPI_GER - 1) * nprocs * (nprocs - nprocs_node[i]) *
			sizeof(struct nmsg)) +
/*
 * one packet for consumables to move through the buffer daemon
 */
			sizeof(struct nmsg) + MAXNMSGLEN +
/*
 * slush to cover up my mistakes
 */
			0x100000;

		bfp.bfp_maxspace = max(bfp.bfp_maxspace, BFDEFSPACE);

		if (rbfparms(nodes[i], &bfp)) {

			if (flag_print) {
				fprintf(stderr,
			    "mpirun: cannot allocate GER (%d bytes) on %s: ",
						bfp.bfp_maxspace,
						mnemonic(nodes[i]));
				terror("");
			}

			return(LAMERROR);
		}
	}

	free((char *) nodes);
	free((char *) nprocs_node);

	return(0);
}

/*
 *	asc_run
 *
 *	Function:	- run the described application atomically
 *			- if a process cannot start, doom the running ones
 *	Accepts:	- app. schema desc.
 *			- number of parents in parent world
 *			- run-time environment flag
 *			- verbose flag
 *			- print error flag
 *			- GPS array describing running processes
 *	Returns:	- 0 or LAMERROR
 */
int
asc_run(appd, nparent, rtf, verbose, prerror, pgps)

LIST			*appd;
int			nparent;
int4			rtf;
int			verbose;
int			prerror;
struct _gps		*pgps;

{
	struct aschema *pp;			/* ptr process entry */
	int4		world_save;		/* saved ki_world */
	int4		parent_save;		/* saved ki_parent */
	int		i;			/* favourite index */
	int		pid;			/* process ID */
	int		idx;			/* process index */

	if ((appd == 0) || (al_count(appd) == 0)) {
		errno = EINVAL;
		return(LAMERROR);
	}
/*
 * Set up the kernel I/O block.
 */
	world_save = _kio.ki_world;
	parent_save = _kio.ki_parent;

	_kio.ki_parent = nparent;
	_kio.ki_world = al_count(appd);
/*
 * Increment the stdio counts.
 */
	fdincr(_kio.ki_world);
/*
 * Loop starting each process.
 */
	pp = (struct aschema *) al_top(appd);

	for (i = 0; pp; ++i, pp = (struct aschema *) al_next(appd, pp)) {
/*
 * Start the process and store its pid.
 */
		if (rploadgo(pp->asc_srcnode, pp->asc_node,
				pp->asc_args->apa_argv[0], rtf,
				pp->asc_args->apa_argv, &pid, &idx)) {
			pp->asc_errno = errno;
			starterror(pp->asc_args->apa_argv[0], pp->asc_node,
					prerror);
			break;
		}

		pgps[i].gps_node = (pp->asc_node == LOCAL) ?
				getnodeid() : pp->asc_node;
		pgps[i].gps_pid = pid;
		pgps[i].gps_idx = idx;
		pgps[i].gps_grank = i;
		
		if (verbose) {
			printf("%d %s running on %s\n", pid,
					pp->asc_args->apa_argv[0],
					mnemonic(pp->asc_node));
		}
	}

	_kio.ki_world = world_save;
	_kio.ki_parent = parent_save;
/*
 * If all processes were not started, cleanup and doom the running ones.
 * This should not really be done here and we should simply return the
 * number of processes successfully started.  To help avoid a race with
 * processes entering the kernel we sleep before cleaning dooming them.
 */
	world_save = al_count(appd);

	if (i < world_save) {
		fdincr(- world_save);
		sleep(1);
		app_doom(i, pgps);
		return(LAMERROR);
	}

	return(0);
}

/*
 *	fdincr
 *
 *	Function:	- increment stdio file desc.
 *	Accepts:	- increment value
 */
static void
fdincr(incr)

int4			incr;

{
	if (_kio.ki_stdin > 2) lam_rfincr(0, incr);
	if (_kio.ki_stdout > 2) lam_rfincr(1, incr);
	if (_kio.ki_stderr > 2) lam_rfincr(2, incr);
}

/*
 *	starterror
 *
 *	Function:	- print can't start program error
 *	Accepts:	- file name
 *			- ID of node failed on
 *			- print error flag
 */
static void
starterror(file, nodeid, prerror)

char *			file;
int			nodeid;
int			prerror;

{
	if (prerror) {
		fprintf(stderr, "mpirun: cannot start %s on %s: ",
				file, mnemonic(nodeid));
		terror("");
	}
}
