/* glob.c */
/* wildcard matching routines */
/*
gtkfind - a graphical "find" program
Copyright (C) 1998  Matthew Grossman <mattg@oz.net>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/


/* this should be just a routine to match a text string with a
   wildcard-containing text string.  I want to have the capability to save
   portions of the match into a register, so you can access them later
   by using \1, \2, etc. just like in sed

   \0 should be the whole string

   we have a max of 10 registers...should be enough...

   wildcards: ? * {...} [...]

   registers: (...)

   you can use a \ to escape a special character
   
   Because of this (the special capabilities we want), we are not
   using a library

   when copy_stringp == 0, registers are disabled. () are just ignored
   in the pattern
*/

#include "glob.h"

#define MAX_REGISTERS 10
#define REGISTER_SIZE 256

/* globals for keeping track of registers */

static caddr_t r_start[MAX_REGISTERS];
static caddr_t r_end[MAX_REGISTERS];


static char *
strechr(char *s, char c)
     /* return a pointer to the first non \-escaped character in s, or NULL */
{
  char *rv = NULL;
  char *p = NULL;
  int match = 0;

  p = s;

  while(*p && !match) {
    if(*p == c) {
      if(c == '\\') {
	if(*(p + 1) == '\\')
	  p += 2;
	else
	  match++;
      }
      else {
	if((p > s && *(p - 1) != '\\') || p == s)
	  match++;
	else
	  p += 1;
      }
    }
    else
      p += 1;
  }

  if(match)
    rv = p;

  return(rv);
}

static char *
stretok(char *s, char *tokens)
     /* do a strtok on s with tokens that are not \-escaped */
{
  static char *mem = NULL;
  char *p = NULL, *q = NULL, *r = NULL;
  char *rv = NULL;

  if(s)
    mem = s;
  else if(mem == NULL) /* s == NULL && mem == NULL */
    goto DONE;

  p = mem;
  q = tokens;

  do {
    r = strechr(p, *q); /* in a real libc we couldn't do this? */
    if(r) {
      *r = '\0';
      break;
    }
  } while(*q && q++);

  if(r) 
    mem = r + 1;
  else
    mem = NULL;

  rv = p;

 DONE:
  return(rv);
}



char *
match_squares(char *pattern, char *string)
     /* see if the square braces in pattern match the next
	character in string, if they do return the new pattern */
{
  int negated = 0;
  char start = 0, end = 0;
  int success = 0;
  char *rv = NULL;

  if(*(++pattern) == '!' || *pattern == '^')
    negated = 1;
    
  while(1) {
    if(*pattern == ']' &&
       (*(pattern - 1) != '[' ||
	*(pattern + 1) != ']'))
      break; /* you can include a ']' by putting it first or last */
    
    if(success) {
      pattern++;
      continue; /* we've already matched, and now we are just searching
		   for the last ] */
    }
    
    else if((*pattern == '-') &&
	    (*(pattern - 1) != '[' &&
	     *(pattern + 1) != ']')) {
      
      /* this is a "-" used as a range */
      
      start = *(pattern - 1) + 1;
      end = *(pattern + 1);
      while(start <= end) {
	if(start++ == *string) {
	  success = 1;
	}
      }
      pattern += 1;
    }
    if(*pattern == *string)
      success = 1;

    pattern++;
  }

  if((success && !negated) ||
     (!success && negated))
    rv = pattern + 1;
  else
    rv = NULL;

  return(rv);
}

int
glob(char *pattern, char *string, char **registers, int c_reg)
     /* glob pattern to string, return 1 if it matches, 0 if it doesn't */
{
  int rv = 0;

  if(*pattern == '\0' && *string == '\0')
    rv = 1; /* this case occurs sometimes, as long as both strings
	       run out at the same time we know that they matched previously
	    */
    
  else if(*pattern == '\\') {
    if(*(pattern + 1) == *string)
      rv = glob(pattern + 2, string + 1, registers, c_reg);
    else
      rv = 0;
  }

  else if(*pattern == '(') {
    /* we always know where the register starts, we just don't know
       where it ends */
    if(r_start[c_reg]) {
#ifdef DEBUG
      fprintf(stderr, "glob: registers cannot be nested!\n");
#endif
      rv = 0;
    }
    else {
      r_start[c_reg] = (caddr_t)string;
      if(glob(pattern + 1, string, registers, c_reg)) {
	rv = 1;
      }
    }
  }

  else if(*pattern == ')') {
    /* deal with trailing stars */
    if(*(pattern + 1) == '\0' && *(pattern - 1) == '*') {
      rv = 1;
      r_end[c_reg] = (caddr_t)(string + strlen(string));

      if(registers)
	registers[c_reg] = copy_string((char *)r_start[c_reg],
				       (char *)r_end[c_reg]);
    }
    else if(glob(pattern + 1, string, registers, c_reg + 1)) {
      rv = 1;

      /* we already know this pattern matches, so we can put it into
	 the register with no fear... */
      
      r_end[c_reg] = (caddr_t)string;

      if(registers)
	registers[c_reg] = copy_string((char *)r_start[c_reg],
				       (char *)r_end[c_reg]);
    }
  }
    
    
  else if(*pattern == '?') {
    rv = glob(pattern + 1, string + 1, registers, c_reg);
  }
  
  else if(*pattern == '*') {
    while(*(pattern + 1) == '*')
      pattern++; /* eat up extra '*' */
    if(strlen(pattern) == 1) {
      rv = 1;
    }
    else {
      while(*string) {
	if(glob(pattern + 1, string++, registers, c_reg)) {
	  rv = 1;
	}
      }
    }
  }

  else if(*pattern == '[') {
    pattern = match_squares(pattern, string);
    if(pattern)
      rv = glob(pattern, string + 1, registers, c_reg);
    else
      goto DONE;
  }

  else if(*pattern == '{') {
    /* best way to handle this:
       dynamically make up a pattern and match it */
    char *tmp = NULL, *q = NULL;
    char *copy = NULL;
    char *s = NULL;
    char *last_brace = NULL;
    int i = 0;

    copy = (char *)alloca(strlen(pattern) + 1);
    if(!copy)
      goto DONE;
    strcpy(copy, pattern + 1);
    last_brace = strechr(copy, '}');
    if(!last_brace)
      goto DONE;
    *last_brace = '\0';
    
    q = stretok(copy, ",");
    
    s = strechr(pattern, '}'); /* get the rest of the pattern after the { */
    s++;
    
    if(!s || !q || q >= s) {
      goto DONE;
    }
  
    i = strlen(q) + strlen(s) + 1;
    tmp = (char *)malloc(sizeof(char) * i);
    strcpy(tmp, q);
    strcat(tmp, s); /* make up a pattern and try matching it, if we fail
		       make up the next one, etc. */
    while((rv = glob(tmp, string, registers, c_reg)) == 0) {
      q = stretok(NULL, ",");
      if(!q || q >= s)
	break;
      else {
	i = strlen(q) + strlen(s) + 1;
	tmp = (char *)realloc(tmp,sizeof(char) * i);
	strcpy(tmp, q);
	strcat(tmp, s);
      }
    }
    if(tmp)
      free(tmp);
  }

	
  else if(strcmp(pattern, string) == 0)
    rv = 1;
  
  else if(*pattern == *string)
    rv = glob(pattern + 1, string + 1, registers, c_reg);
  
  DONE:
    return(rv);
}

char **
glob_string(char *pattern, char *string, int copy_stringp)
     /* the user-visible function, returns the registers, with the
	string in register[0] if copy_stringp */
{
  char **registers = NULL;

  if(copy_stringp) {
    registers = allocate_glob_registers();
    memset(r_start, 0, MAX_REGISTERS * sizeof(caddr_t));
    memset(r_end, 0, MAX_REGISTERS * sizeof(caddr_t));

    registers[0] = copy_string(string, string + strlen(string));
  }
  
  if(strcmp(pattern, "*") == 0 || glob(pattern, string, registers, 1)) {
    if(copy_stringp)
      return(registers);
    else
      return((char **)1);
  }
  else {
    if(registers)
      free_glob_registers(registers);
    return(NULL);
  }
}

char **
allocate_glob_registers()
{
  char **rv = NULL;

  rv = (char **)calloc(1, MAX_REGISTERS * sizeof(char *));
  return(rv);
}

void
free_glob_registers(char **registers)
     /* use this to free glob registers */
{
  int i = 0;

  if(registers) {
    for(i = 0; i < MAX_REGISTERS; i++) {
      if(registers[i])
	free(registers[i]);
    }
    free(registers);
  }

  return;
}

char *
copy_string(char *start, char *end)
     /* allocate string between start and end, return it */
{
  char *rv = NULL;
  int length = 0;

  length = (end - start) + 1;
  rv = (char *)malloc(length * sizeof(char));
    
  strncpy(rv, start, length - 1);
  rv[length - 1] = '\0';

  return(rv);
}
  

#ifdef GLOBTEST

  
int
main(int argc, char *argv[])
{
  char **registers = NULL;
  int i = 0;

  if((registers = glob_string(argv[1], argv[2], 1)) != NULL) 
    printf("%s matches %s!\n", argv[1], argv[2]);
  else
    printf("%s doesn't match %s\n", argv[1], argv[2]);
  
  for(i = 0; i < MAX_REGISTERS; i++) {
    if(registers && registers[i])
      printf("registers[%d] = %s\n", i, registers[i]);
  }

  free_glob_registers(registers);
  
  exit(0);
}

#endif /* GLOBTEST */
