/*
  sorttable: sorts a NoSQL table on one or more columns.

  Copyright (c) 1998,2006 Carlo Strozzi

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 dated June, 1991.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

  $Id: sorttable.c,v 1.6 2006/03/10 11:26:13 carlo Exp $

*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/file.h>
#include <string.h>

#ifndef HELPDIR
#define HELPDIR "/usr/local/nosql/help"
#endif
#ifndef HELPFILE
#define HELPFILE (HELPDIR "/sorttable.txt")
#endif

#ifndef DOCDIR 
#define DOCDIR "/usr/local/nosql/doc"
#endif
#ifndef COPYING
#define COPYING (DOCDIR "/COPYING")
#endif
#ifndef WARRANTY
#define WARRANTY (DOCDIR "/WARRANTY")
#endif

/* malloc() block size, in bytes. Do not set to less than sizeof(int) */
#define ALLOCSIZ 32

/* max. column name length is 31 (plus the trailing zero) */
#define MAXCOLNAME 31

typedef struct Header {
  char *names,			/* column names */
       *cmd,			/* buffer for optional back-end cmd */
       *args,			/* h.argv[] buffer */
       **argv;			/* h.argv[] for execvp(3) */
  int  top,			/* top used location in *names (0-n) */
       end,			/* last available position in *names */
       *offset,			/* element offsets in *names */
       otop,			/* top used location in *offset */
       oend,			/* last available position in *offset */
       ctop,			/* top used location in *cmd (0-n) */
       cend,			/* last available position in *cmd */
       atop,			/* top used location in *args (0-n) */
       aend,			/* last available position in *args */
       vtop,			/* top used location in h.argv[] */
       vend,			/* last available position in h.argv[] */
       argc;			/* argc for h.argv[] */
} Header;

struct {
  unsigned int header : 1;
  unsigned int debug : 1;
} flags;

/* global variables */
static char *progname;		/* global pointer to argv[0] */

/* Function declarations */

/* Unbuffered input routine */
static int getch(void) {
  char c;
  return (read(0, &c, 1) == 1) ? (unsigned char) c : EOF;
}

static int eusage(void) {
  fprintf(stderr, "Usage: %s [options] column-list'\n", progname);
  return 1;
}

static int etoolong(void) {
  fprintf(stderr, "%s: max. column name width (%d) exceeded\n",\
        progname,MAXCOLNAME-1);
  return 1;
}

static int colpos(Header *h, char *name) {

  int i;

  for (i = 0; i <= h->otop; i++)
      if (!strcmp(name, h->names + h->offset[i]) && h->offset[i] >= 0)
	return i;

  return -1;
}

/* Add new column to the table header */

static void addcol(Header *h, char *name) {

  static int init=1;

  /* init header if first time */

  if (init) {
     h->top = -1;
     h->end = -1;
     h->otop = -1;
     h->oend = -1;

     if ((h->names = (char *)malloc(h->end+=ALLOCSIZ)) == NULL ||
	  (h->offset = malloc(ALLOCSIZ*sizeof(int))) == NULL){
	perror(progname);
	exit(1);
     }
     h->oend += ALLOCSIZ;
     h->names[0] = '\0';			/* init string */
     init = 0;
  }

  /* ignore duplicated column names, if any */
  if (colpos(h, name) >= 0) return;

  /* use '>=' in comparison, to account for string terminator,
    and add '1' as an extra safety measure to cope with equality
    conditions */

  if ((h->top+strlen(name)+1) >= h->end) { /* realloc needed */
     if ((h->names = (char *)realloc(h->names,h->end+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  /* add new column offset to the relevant array.
    Use '>=' in comparison, and add '1' as an extra safety measure
    to cope with equality conditions */

  if ((h->otop+1) >= h->oend) {		/* realloc needed */
     if ((h->offset = realloc(h->offset,(h->oend+=ALLOCSIZ)*sizeof(int))) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  /* add offset for the new column, relative to h->names */
  *(h->offset + ++h->otop) = (h->top + 1);

  /* add new name to column names */
  h->top += (sprintf(h->names + h->top + 1, "%s", name) + 1);
}

/* remove column-level options, if any.
   the original string is truncated at the point where the ":" occurs */

static void delopts(char *name) {

  while (*name) {
     if (*name == ':') {
	*name = '\0';
	break;
     }
     name++;
  }
}

/* return column-level options, if any.
   the original string is left unchanged */

static void colopts(char *dest, char *src, int max) {

  int copy=0;

  *dest = '\0';			/* init target */

  while (*src) {
     if (copy) {
	if (max <= 0) max = strlen(src);
	strncat(dest, src, max);
	break;
     }
     else if (*src == ':') copy = 1;
     src++;
  }
}

/* add back-end cmd arguments to the execvp(3) argument array */

static void addarg(Header *h, char *arg) {

  static int init=1;

  /* init header if first time */

  if (init) {
     h->atop = -1;
     h->aend = -1;

     if ((h->args = (char *)malloc(h->aend+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
     h->args[0] = '\0';			/* init string (mandatory!) */
     init = 0;
  }

  /* use '>=' in comparison, to account for string terminator */
  if ((h->atop+strlen(arg)) >= h->aend) { /* realloc needed */
     if ((h->args = (char *)realloc(h->args,h->aend+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  /* add new arg to arg list */
  h->atop += (sprintf(h->args + h->atop + 1, "%s", arg) + 1);
}

/* prepare **argv before exec'ing sort(1) */

static void setargv(Header *h) {

  int i = 0, end = 0;

  h->vtop = -1;
  h->vend = -1;
  h->argc = 0;

  if ((h->argv = (char **)malloc(ALLOCSIZ)) == NULL){
     perror(progname);
     exit(1);
  }
  h->vend += ALLOCSIZ/sizeof(int);

  *(h->argv + ++h->vtop) = h->args;

  while (++i <= h->atop) {
     if (*(h->args + i) == '\0') {
	end = 1;			/* end of element detected */
	continue;
     }

     if (end) {

  	/* dynamically resize h->argv[] as needed,
	   and account for ending NULL */

  	if ((h->vtop + 1) >= h->vend) {		/* realloc needed */
     	   if ((h->argv = (char **)realloc(h->argv,ALLOCSIZ)) == NULL) {
	      perror(progname);
	      exit(1);
     	   }
     	   h->vend += ALLOCSIZ/sizeof(int);
  	}

	*(h->argv + ++h->vtop) = (h->args + i);
	h->argc++;
	end = 0;
     }
  }

  /* always append terminator, as mandated by ececvp(3) */
  *(h->argv + (h->vtop + 1)) = NULL;

  if (flags.debug) {
     for (i=0; i < h->argc; i++) fprintf(stderr, "%s ", h->argv[i]);
     if (h->argc) fprintf(stderr, "%s\n", h->argv[i]);
  }
}

static void printhdr(Header *h) {

  int i, j;

  if (!flags.header) return;

   for (i=j=0; i <= h->otop; i++) {
      if (h->offset[i] >= 0) {

	 /* prepend TAB if not first column */
	 if (j++) printf("\t\001");
	 else printf("\001");

	 printf("%s", h->names + h->offset[i]);
      }
   }

   if (j) printf("\n");		/* add NL if at least one column */
}

int main(int argc, char *argv[]) {

  int i=0, j=0, k=0, m=0;
  char c;

  char tmpbuf[MAXCOLNAME] = "";	 	/* local work buffer */
  char tmpopt[MAXCOLNAME] = "";	 	/* column-specific options */
  char colnam[MAXCOLNAME] = "";	 	/* current column name */

  Header h;

  FILE *ifile = NULL, *ofile = NULL;

  flags.header = 1; 		/* default it to print the header */
  flags.debug  = 0;

  progname = argv[0];

  sprintf(tmpbuf, "sort");		/* init h.argv */
  addarg(&h, tmpbuf);
  sprintf(tmpbuf, "-t\t");
  addarg(&h, tmpbuf);
  sprintf(tmpbuf, "-s");
  addarg(&h, tmpbuf);

  if (getenv("NOSQL_DEBUG") != NULL) {
     if (!strncmp(getenv("NOSQL_DEBUG"),"1",1)) flags.debug = 1;
  }

  while (++i < argc && *argv[i] == '-') {

    if (!strcmp(argv[i], "-x") ||
  	     !strcmp(argv[i], "--debug")) flags.debug = 1;

    else if (!strcmp(argv[i], "-N") ||
  	     !strcmp(argv[i], "--no-header")) flags.header = 0;

    else if (!strcmp(argv[i], "-i") ||
    	     !strcmp(argv[i], "--input")) {

      if (++i >= argc || *argv[i] == '-') exit(eusage());

      if ((ifile = freopen(argv[i], "r", stdin)) < 0) {
         perror(argv[i]);
         exit(1);
      }
    }

    else if (!strcmp(argv[i], "-o") ||
    	     !strcmp(argv[i], "--output")) {

      if (++i >= argc || *argv[i] == '-') exit(eusage());

      if ((ofile = freopen(argv[i], "w", stdout)) < 0) {
    	 perror(argv[i]);
    	 exit(1);
      }
    }

    else if (!strcmp(argv[i], "-h") ||
    	     !strcmp(argv[i], "--help")) {

      execlp("grep","grep","-v","^#",HELPFILE,(char *) 0);
      perror("grep");
      exit(1);
    }
    else if (!strcmp(argv[i], "--show-copying")) {
      execlp("cat","cat",COPYING,(char *) 0);
      perror("cat");
      exit(1);
    }
    else if (!strcmp(argv[i], "--show-warranty")) {
      execlp("cat","cat",WARRANTY,(char *) 0);
      perror("cat");
      exit(1);
    }

    /* now consider global sort(1) options */
    else {
      if (!strcmp(argv[i], "-c")) flags.header = 0;
      addarg(&h, argv[i]);
    }
  }

  while ((c = getch()) != EOF) {

     /* ignore SOH and blank chars in header names */
     if (c == '\001' || c == ' ') continue;

     if (j >= MAXCOLNAME) exit(etoolong());

     if (c != '\t' && c != '\n') {
        tmpbuf[j++] = c;
	continue;
     }

     tmpbuf[j++] = '\0';			/* set terminator */
     addcol(&h, tmpbuf);			/* append to header */
     j = 0;

     if (c == '\n') {

	while (i < argc) {
	  if (*argv[i] == '-') addarg(&h, argv[i]);	/* option */
	  else {				/* column name(s) */
	     /* handle comma-separated column list */
	     m = 0;
	     while (*argv[i]) {
	       if (*argv[i] != ',') {
		  colnam[m++] = *argv[i];
		  if (m >= MAXCOLNAME) exit(etoolong());
	       }
	       else {
		  argv[i]++;	/* skip comma */
		  i--;		/* multi-column argv entry */
		  break;
	       }
	       argv[i]++;
	     }
	     colnam[m++] = '\0';	/* top-off */

	     colopts(tmpopt, colnam, MAXCOLNAME);
	     delopts(argv[i]);
	     if ((k=colpos(&h, colnam)) >= 0) {
		sprintf(tmpbuf, "+%d%s", k, tmpopt);
		addarg(&h, tmpbuf);
		sprintf(tmpbuf, "-%d", k+1);
		addarg(&h, tmpbuf);
	     }
	  }
	  i++;
	}

	printhdr(&h);			/* print header to stdout */

        fflush(NULL);	/* Make sure the header is output */
	sprintf(tmpbuf, "LC_ALL=POSIX");
	putenv(tmpbuf);
	setargv(&h);			/* Prepare argv[] for sort(1) */
        execvp("sort", h.argv);
        perror("sort");
        exit(1);
     }
  }

  exit(0);			/* Not reached */
}

/* EOF */
