/* Implementation of TLLLex class.
   This file is part of TL, Tiggr's Library.
   Written by Tiggr <tiggr@es.ele.tue.nl>
   Copyright (C) 1995, 1996 Pieter J. Schoenmakers
   TL is distributed WITHOUT ANY WARRANTY.
   See the file LICENSE in the TL distribution for details.

   $Id: TLLLex.m,v 1.3 1998/04/19 07:42:20 tiggr Exp $  */

#import "tl/support.h"
#import <limits.h>
#import "tl/TLLLex.h"
#import "tl/TLCons.h"
#import "tl/TLLSubroutine.h"
#import "tl/TLLInvocation.h"
#import "tl/TLStringStream.h"
#import "tl/TLLTag.h"
#import "tl/subr.h"
#import <float.h>
#import <ctype.h>
#import <string.h>

#define GETCHAR()  \
  (nch == -1 ? [stream readByte] : ({int c = nch; nch = -1; c;}))

#define UNGETCHAR(C)  \
  do { if (nch != -1) abort (); else nch = (C); } while (0)

#define RESIZE()  \
  do								\
    if (next == end)						\
      {								\
	int cap = (end - buf) ? 2 * (end - buf) : 10;		\
	char *new = xrealloc (buf, cap * sizeof (*buf));	\
								\
	next = new + (next - buf);				\
	end = new + cap;					\
	buf = new;						\
      }								\
    while (0)

static id close_paren, close_bracket, at, dot, hash_bang;

/* The symbol returned by a lexer upon reaching EOF.  */
TLSymbol *Qlex_eof;

@implementation TLLLex

/******************** private methods ********************/

-readInternal
{
  int c;

  for (next = buf, c = GETCHAR (); c >= 0; c = GETCHAR ())
    switch (c)
      {
      case ';':
	do
	  c = GETCHAR ();
	while (c >= 0 && c != '\n');
	if (c < 0)
	  return (Qlex_eof);
	/* Drop through.  */
      case '\n':
	line++;
	/* Drop through.  */
      case 0:  case 1:  case 2:  case 3:  case 4:  case 5:  case 6:  case 7:
      case 8:  case 9:  case 11: case 12: case 13: case 14: case 15: case 16:
      case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24:
      case 25: case 26: case 27: case 28: case 29: case 30: case 31: case ' ':
	break;

      case '(':
	{
	  TLCons *list = nil, *last = nil;
	  int dots_ago, len;
	  id e;
	  
	  for (len = dots_ago = 0;;)
	    {
	      e = [self readInternal];
	      if (e == close_paren)
		break;
	      if (e == Qlex_eof)
		[self error: "%d: expected ')' before EOF (%s)", line, ERRMSG];
	      else if (e == close_bracket)
		[self error: "%d: expected `)' but found `]'", line];
	      else if (e == dot)
		if (dots_ago)
		  [self error: "%d: too many `.'", line];
		else
		  dots_ago = 1;
	      else
		{
		  len++;
		  if (!dots_ago)
		    e = CONS (e, nil);
		  else if (++dots_ago > 2)
		    [self error: "%d: too many items after `.'", line];

		  if (last)
		    [last setCdr: e];
		  else
		    list = e;
		  last = e;
		}
	    }
	  if (list && [list car] == at)
	    return ([TLLInvocation invocationWithCompressedList: [list cdr]
		     length: len - 1]);
	  return (list);
	}
	break;

      case '[':
        {
	  TLCons *list = nil, *last = nil;
	  int len;
	  id e;

	  /* Read all elements of the expression.  */
	  for (len = 0;;)
	    {
	      e = [self readInternal];
	      if (e == close_bracket)
		break;
	      if (e == Qlex_eof)
		[self error: "%d: expected ']' before EOF", line];
	      else if (e == close_paren)
		[self error: "%d: expected `]' but found `)'", line];
	      else
		{
		  len++;
		  e = CONS (e, nil);
		  if (last)
		    [last setCdr: e];
		  else
		    list = e;
		  last = e;
		}
	    }
	  return ([TLLInvocation invocationWithList: list length: len]);
        }
	break;

      case ')':
	return (close_paren);

      case ']':
	return (close_bracket);

      case '@':
	return (at);

      case '\'':
	{
	  id e = [self readInternal];
	  if (e == close_bracket || e == close_paren)
	    [self error: "%d: unbalanced parenthesis", line];
	  if (e == Qlex_eof)
	    [self error: "%d: unexpected EOF (%s)", line, ERRMSG];
	  return (CONS (Qquote, CONS (e, nil)));
	}

      case '"':
	for (c = GETCHAR (); c >= 0 && c != '"'; c = GETCHAR ())
	  {
	    RESIZE ();
	    switch (c)
	      {
	      case '\\':
		c = GETCHAR ();
		if (c == TL_EOF)
		  [self error: "%d: unexpected EOF after `\\' (%s)",
		   line, ERRMSG];
		switch (c)
		  {
		  case '\n': line++; break;
		  case 'f': *next++ = '\f'; break;
		  case 'n': *next++ = '\n'; break;
		  case 'r': *next++ = '\r'; break;
		  case 't': *next++ = '\t'; break;
		  case '"': *next++ = '"'; break;
		  case '\\': *next++ = '\\'; break;
		  default: *next++ = c; break;
		  case '0':
		  case '1':
		  case '2':
		  case '3':
		    *next = 0;
		    for (;;)
		      {
			if (c >= '0' && c <= '7')
			  *next = (*next << 3) | (c - '0');
			else
			  break;
			c = GETCHAR ();
		      }
		    if (c >= 0)
		      UNGETCHAR (c);
		    next++;
		    break;
		  }
		break;

	      case '\n':
		line++;
		/* Drop through.  */
	      default:
		*next++ = c;
		break;
	      }
	  }
	if (c < 0)
	  [self error: "%d: unexpected EOF (%s)", line, ERRMSG];
	return ([CO_TLString stringWithCString: buf length: next - buf]);
	
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
      case '.': case '+': case '-':
	{
	  enum
	    {
	      SEEN_START,
	      SEEN_SIGN,
	      SEEN_INT_DIGIT,
	      SEEN_DOT,
	      SEEN_FRAC_DIGIT,
	      SEEN_EXP,
	      SEEN_EXP_SIGN,
	      SEEN_EXP_DIGIT,
	      SEEN_ERROR
	    } state = SEEN_START;

	  do
	    {
	      RESIZE ();
	      *next++ = c;
	      switch (c)
		{
		case '+':
		case '-':
		  if (state == SEEN_START)
		    state = SEEN_SIGN;
		  else if (state == SEEN_EXP)
		    state = SEEN_EXP_SIGN;
		  else
		    state = SEEN_ERROR;
		  break;

		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
		  if (state <= SEEN_INT_DIGIT)
		    state = SEEN_INT_DIGIT;
		  else if (state <= SEEN_FRAC_DIGIT)
		    state = SEEN_FRAC_DIGIT;
		  else if (state <= SEEN_EXP_DIGIT)
		    state = SEEN_EXP_DIGIT;
		  else
		    state = SEEN_ERROR;
		  break;

		case '.':
		  state = state <= SEEN_INT_DIGIT ? SEEN_DOT : SEEN_ERROR;
		  break;

		case 'e': case 'E':
		case 'f': case 'F':
		  state = (state < SEEN_EXP && state > SEEN_SIGN
			   ? SEEN_EXP : SEEN_ERROR);
		  break;

		default:
		  state = SEEN_ERROR;
		  break;
		}
	      c = GETCHAR ();
	    } while (state != SEEN_ERROR
		     && c > ' ' && c != ';'
		     && c != '(' && c != ')' && c != '[' && c != ']');
	  UNGETCHAR (c);

	  if (state != SEEN_ERROR && state > SEEN_SIGN)
	    if (state == SEEN_INT_DIGIT)
	      {
		/* See if this'll fit an int.  */
		int neg = *buf == '-', v, base = 10;
		char *n;

		if (neg)
		  for (v = 0, n = buf + 1; n < next; n++)
		    if ((INT_MIN + (*n - '0')) / base <= v)
		      v = v * base - (*n - '0');
		    else
		      break;
		else
		  for (v = 0, n = buf + (*buf == '+'); n < next; n++)
		    if ((INT_MAX - (*n - '0')) / base >= v)
		      v = v * base + (*n - '0');
		    else
		      break;

		if (n == next)
		  {
		    if (v >= TL_SMALL_INT_MIN && v <= TL_SMALL_INT_MAX)
		      return (tll_small_int[v]);
		    return ([CO_TLNumber numberWithInt: v]);
		  }
		else
		  {
		    /* Hopefully, this'll fit a long long.  */
		    long long lv;

		    for (lv = 0, n = buf + (neg || *buf == '+'); n < next; n++)
		      {
			long long nlv = lv * base + (*n - '0');
			if ((nlv  - (*n - '0')) / base != lv)
			  {
			    *next = 0;
			    [self error: "number too large: %s", buf];
			  }
			lv = nlv;
		      }
		    if (neg)
		      lv = -lv;
		    return ([CO_TLNumber numberWithLongLong: lv]);
		  }
	      }
	    else if (!(state == SEEN_DOT && next - buf == 1))
	      {
		static double powers[3][10] =
		  {
		    {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9},
		    {1e0, 1e10, 1e20, 1e30, 1e40, 1e50, 1e60, 1e70, 1e80, 1e90},
		    {1e0, 1e100, 1e200, 1e300, 0, 0, 0, 0, 0, 0}
		  };

		int i, neg = *buf == '-', double_p;
		double exp, man = 0;
		char *n = buf;

		/* Get mantissa.  */
		if (neg)
		  {
		    for (n++; isdigit (*n); exp *= 10, n++)
		      man = 10 * man - (*n - '0');
		    if (*n == '.')
		      for (n++, exp = 0.1;
			   n < next && isdigit (*n); exp /= 10, n++)
			man -= (*n - '0') * exp;
		  }
		else
		  {
		    for (n += *n == '+'; isdigit (*n); n++)
		      man = 10 * man + (*n - '0');
		    if (*n == '.')
		      for (n++, exp = 0.1;
			   n < next && isdigit (*n); exp /= 10, n++)
			man += (*n - '0') * exp;
		  }

		if (n == next)
		  double_p = 1;
		else
		  {
		    /* Handle `e' or `f' exponent indication.  */
		    double_p = !(*n == 'f' || *n == 'F');

		    /* Handle exponent sign.  */
		    if (++n != next)
		      {
			neg = *n == '-';
			if (*n == '-' || *n == '+')
			  n++;
		      }

		    /* The value of the exponent does not matter if the
                       mantissa is 0.  */
		    if (!man)
		      n = next;

		    /* Skip leading zeroes.  */
		    while (n != next && *n == '0')
		      n++;

		    /* Read exponent.  */
		    if (n != next)
		      {
			char *start;

			for (start = n; n != next; n++);
			if (n != start)
			  {
			    if (neg)
			      for (n--, i = 0; n >= start; n--, i++)
				if (i > 2 || !powers[i][*n - '0']
				    || (DBL_MIN * powers[i][*n - '0']
					> (man < 0 ? -man : man)))
				  break;
				else
				  man /= powers[i][*n - '0'];
			    else
			      for (n--, i = 0; n >= start; n--, i++)
				if (i > 2 || !powers[i][*n - '0']
				    || (DBL_MAX / powers[i][*n - '0']
					< (man < 0 ? -man : man)))
				  break;
				else
				  man *= powers[i][*n - '0'];
			    if (n >= start)
			      if (man < 0)
				man = neg ? -DBL_MIN : -DBL_MAX;
			      else
				man = neg ? DBL_MIN : DBL_MAX;
			  }
		      }
		  }

		if (!double_p)
		  if (man > FLT_MAX)
		    man = FLT_MAX;
		  else if (man < -FLT_MAX)
		    man = -FLT_MAX;
		  else if (man > 0)
		    {
		      if (man < FLT_MIN)
			man = FLT_MIN;
		    }
		  else if (man < 0)
		    {
		      if (man > -FLT_MIN)
			man = FLT_MIN;
		    }

		return (double_p
			? [CO_TLNumber numberWithDouble: man]
			: [CO_TLNumber numberWithFloat: man]);
	      }
	  c = *--next;
	}
	/* Drop through.  */
      default:
        {
	  do
	    {
	      RESIZE ();
	      switch (c)
		{
		case '\\':
		  c = GETCHAR ();
		  switch (c)
		    {
		    case TL_EOF:
		      [self error: "%d: unexpected EOF after `\\' (%s)",
		       line, ERRMSG];
		    case '\n': line++;
		    default: *next++ = c; break;
		    }
		  break;
		default:
		  *next++ = c;
		  break;
		}
	      c = GETCHAR ();
	    } while (c > ' ' && c != ';' && c != '(' && c != ')'
		     && c != '[' && c != ']');
	  /* Don't unget a space.  */
	  if (c >= 0 && c != ' ')
	    UNGETCHAR (c);

	  {
	    TLSymbol *s = [CO_TLSymbol
			    symbolWithName: [CO_TLString stringWithCString: buf
						      length: next - buf]];
	    /* Cater for `#!' starting a file.  */
	    if (!line && s == hash_bang)
	      {
		nch = ';';
		return ([self readInternal]);
	      }
	    return (s == Qnil ? nil : s);
	  }
	}
      }

  return (Qlex_eof);
}

/******************** public methods ********************/

+initialize
{
  close_paren = [CO_TLSymbol symbolWithName: @"*lex-close-paren*"];
  close_bracket = [CO_TLSymbol symbolWithName: @"*lex-close-bracket*"];
  Qlex_eof = [CO_TLSymbol symbolWithName: @"*lex-eof*"];
  at = [CO_TLSymbol symbolWithName: @"@"];
  dot = [CO_TLSymbol symbolWithName: @"."];
  hash_bang = [CO_TLSymbol symbolWithName: @"#!"];
  return (self);
} /* +initialize */

+(TLLLex *) lexerWithStream: (id <TLInputStream>) s
{
  return ([[self gcAlloc] initWithStream: s]);
} /* -lexerWithStream: */

+(TLLLex *) lexerWithString: (id <TLString>) s
{
  return ([[self gcAlloc]
	   initWithStream: [TLStringStream streamWithString: s]]);
} /* -lexerWithString: */

-initWithStream: (id <TLInputStream>) s
{
  nch = -1;
  ASGN_IVAR (stream, s);
  return (self);
} /* -initWithStream: */

-line
{
  return ([CO_TLNumber numberWithInt: 1 + line]);
} /* -line */

-(void) print: (id <TLMutableStream>) s quoted: (BOOL) qp
{
  formac (s, @"#<%s %p at line %d on %#>",
	  class_get_class_name (isa), self, line, stream);
} /* -print:quoted: */

-read
{
  id r = [self readInternal];
  if (r == close_bracket || r == close_paren)
    [self error: "%d: unbalanced parenthesis", line];
  return (r);
} /* -read */

-(id <TLInputStream>) stream
{
  return (stream);
} /* -stream */

/******************** garbage collection ********************/

-(void) dealloc
{
  xfree (buf);
} /* -dealloc */

-(void) gcReference
{
  MARK (stream);
} /* -gcReference */

@end
