/*
 *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#ifndef _PARSETREE_H
#define _PARSETREE_H

#include "avlmap.h"
#include "bstmap.h"
#include "vector.h"
#include "dlist.h"
#include "rlparse.h"
#include "parsedata.h"

/*
 * A Variable Definition
 */
struct VarDef
{
	VarDef( char *name, JoinOrLm *joinOrLm )
		: name(name), joinOrLm(joinOrLm) { }
	
	/* Parse tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( const InputLoc &loc, ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	char *name;
	JoinOrLm *joinOrLm;
};


/*
 * LongestMatch
 *
 * Wherever possible the item match will execute on the character. If not
 * possible the item match will execute on a lookahead character and either
 * hold the current char (if one away) or backup.
 *
 * How to handle the problem of backing up over a buffer break?
 * 
 * Don't want to use pending out transitions for embedding item match because
 * the role of item match action is different: it may sometimes match on the
 * final transition, or may match on a lookahead character.
 *
 * Don't want to invent a new operator just for this. So just trail action
 * after machine, this means we can only use literal actions.
 *
 * The item action may 
 *
 * What states of the machine will be final. The item actions that wrap around
 * on the last character will go straight to the start state.
 *
 * Some transitions will be lookahead transitions, they will hold the current
 * character. Crossing them with regular transitions must be restricted
 * because it does not make sense. The transition cannot simultaneously hold
 * and consume the current character.
 */
struct LongestMatchPart
{
	LongestMatchPart( Join *join, Action *action, int longestMatchId ) : 
		join(join), action(action), longestMatchId(longestMatchId),
		inLmSelect(false) { }

	InputLoc getLoc();
	
	Join *join;
	Action *action;
	Action *setActId;
	Action *actOnLast;
	Action *actOnNext;
	Action *actLagBehind;
	int longestMatchId;
	bool inLmSelect;
	LongestMatch *longestMatch;

	LongestMatchPart *prev, *next;
};

/* Declare a new type so that ptreetypes.h need not include dlist.h. */
struct LmPartList : DList<LongestMatchPart> {};

struct LongestMatch
{
	/* Construct with a list of joins */
	LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) : 
		loc(loc), longestMatchList(longestMatchList), name(0), 
		lmSwitchHandlesError(false) { }

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );
	void runLonestMatch( ParseData *pd, FsmAp *graph );
	Action *newAction( ParseData *pd, const InputLoc &loc, char *name, 
			InlineList *inlineList );
	void makeActions( ParseData *pd );
	void findName( ParseData *pd );
	void restart( FsmAp *graph, TransAp *trans );

	InputLoc loc;
	LmPartList *longestMatchList;
	char *name;

	Action *lmActSelect;
	bool lmSwitchHandlesError;

	LongestMatch *next, *prev;
};


/* List of Expressions. */
typedef DList<Expression> ExprList;

struct JoinOrLm
{
	enum Type {
		JoinType,
		LongestMatchType
	};

	JoinOrLm( Join *join ) : 
		join(join), type(JoinType) {}
	JoinOrLm( LongestMatch *longestMatch ) :
		longestMatch(longestMatch), type(LongestMatchType) {}

	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );
	
	Join *join;
	LongestMatch *longestMatch;
	Type type;
};

/*
 * Join
 */
struct Join
{
	/* Construct with the first expression. */
	Join( Expression *expr );
	Join( const InputLoc &loc, Expression *expr );

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	FsmAp *walkJoin( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	/* Data. */
	InputLoc loc;
	ExprList exprList;
};

/*
 * Expression
 */
struct Expression
{
	enum Type { 
		OrType,
		IntersectType, 
		SubtractType, 
		StrongSubtractType,
		TermType, 
		BuiltinType
	};

	/* Construct with an expression on the left and a term on the right. */
	Expression( Expression *expression, Term *term, Type type ) : 
		expression(expression), term(term), 
		builtin(builtin), type(type), prev(this), next(this) { }

	/* Construct with only a term. */
	Expression( Term *term ) : 
		expression(0), term(term), builtin(builtin), 
		type(TermType) , prev(this), next(this) { }
	
	/* Construct with a builtin type. */
	Expression( BuiltinMachine builtin ) : 
		expression(0), term(0), builtin(builtin), 
		type(BuiltinType), prev(this), next(this) { }

	~Expression();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd, bool lastInSeq = true );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	/* Node data. */
	Expression *expression;
	Term *term;
	BuiltinMachine builtin;
	Type type;

	Expression *prev, *next;
};

/*
 * Term
 */
struct Term 
{
	enum Type { 
		ConcatType, 
		RightStartType,
		RightFinishType,
		LeftType,
		FactorWithAugType
	};

	Term( Term *term, FactorWithAug *factorWithAug ) :
		term(term), factorWithAug(factorWithAug), type(ConcatType) { }

	Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
		term(term), factorWithAug(factorWithAug), type(type) { }

	Term( FactorWithAug *factorWithAug ) :
		term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
	
	~Term();

	FsmAp *walk( ParseData *pd, bool lastInSeq = true );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	Term *term;
	FactorWithAug *factorWithAug;
	Type type;

	/* Priority descriptor for RightFinish type. */
	PriorDesc priorDescs[2];
};

/* Structure for storing location of epsilon transitons. */
struct EpsilonLink
{
	EpsilonLink( const InputLoc &loc, NameRef &target )
		: loc(loc), target(target) { }

	InputLoc loc;
	NameRef target;
};

struct Label
{
	Label( const InputLoc &loc, char *data )
		: loc(loc), data(data) { }

	InputLoc loc;
	char *data;
};

/* Third level of precedence. Augmenting nodes with actions and priorities. */
struct FactorWithAug
{
	FactorWithAug( FactorWithRep *factorWithRep ) :
		priorDescs(0), factorWithRep(factorWithRep) { }
	~FactorWithAug();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
	void assignPriorities( FsmAp *graph, int *priorOrd );

	void assignConditions( FsmAp *graph );

	/* Actions and priorities assigned to the factor node. */
	Vector<ParserAction> actions;
	Vector<PriorityAug> priorityAugs;
	PriorDesc *priorDescs;
	Vector<Label> labels;
	Vector<EpsilonLink> epsilonLinks;
	Vector<ParserAction> conditions;

	FactorWithRep *factorWithRep;
};

/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
 * optional and plus. */
struct FactorWithRep
{
	enum Type { 
		StarType,
		StarStarType,
		OptionalType,
		PlusType, 
		ExactType,
		MaxType,
		MinType,
		RangeType,
		FactorWithNegType
	};

	 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, 
			int lowerRep, int upperRep, Type type ) :
		loc(loc), factorWithRep(factorWithRep), 
		factorWithNeg(0), lowerRep(lowerRep), 
		upperRep(upperRep), type(type) { }
	
	FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg )
		: loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { }

	~FactorWithRep();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	InputLoc loc;
	FactorWithRep *factorWithRep;
	FactorWithNeg *factorWithNeg;
	int lowerRep, upperRep;
	Type type;

	/* Priority descriptor for StarStar type. */
	PriorDesc priorDescs[2];
};

/* Fifth level of precedence. Provides Negation. */
struct FactorWithNeg
{
	enum Type { 
		NegateType, 
		CharNegateType,
		FactorType
	};

	FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
		loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }

	FactorWithNeg( const InputLoc &loc, Factor *factor ) :
		loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { }

	~FactorWithNeg();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	InputLoc loc;
	FactorWithNeg *factorWithNeg;
	Factor *factor;
	Type type;
};

/*
 * Factor
 */
struct Factor
{
	/* Language elements a factor node can be. */
	enum Type {
		LiteralType, 
		RangeType, 
		OrExprType,
		RegExprType, 
		ReferenceType,
		ParenType,
		LongestMatchType,
	}; 

	/* Construct with a literal fsm. */
	Factor( Literal *literal ) :
		literal(literal), type(LiteralType) { }

	/* Construct with a range. */
	Factor( Range *range ) : 
		range(range), type(RangeType) { }
	
	/* Construct with the or part of a regular expression. */
	Factor( ReItem *reItem ) :
		reItem(reItem), type(OrExprType) { }

	/* Construct with a regular expression. */
	Factor( RegExpr *regExp ) :
		regExp(regExp), type(RegExprType) { }

	/* Construct with a reference to a var def. */
	Factor( const InputLoc &loc, VarDef *varDef ) :
		loc(loc), varDef(varDef), type(ReferenceType) {}

	/* Construct with a parenthesized join. */
	Factor( Join *join ) :
		join(join), type(ParenType) {}
	
	/* Construct with a longest match operator. */
	Factor( LongestMatch *longestMatch ) :
		longestMatch(longestMatch), type(LongestMatchType) {}

	/* Cleanup. */
	~Factor();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	InputLoc loc;
	Literal *literal;
	Range *range;
	ReItem *reItem;
	RegExpr *regExp;
	VarDef *varDef;
	Join *join;
	LongestMatch *longestMatch;
	int lower, upper;
	Type type;
};

/* A range machine. Only ever composed of two literals. */
struct Range
{
	Range( Literal *lowerLit, Literal *upperLit ) 
		: lowerLit(lowerLit), upperLit(upperLit) { }

	~Range();
	FsmAp *walk( ParseData *pd );

	Literal *lowerLit;
	Literal *upperLit;
};

/* Some literal machine. Can be a number or literal string. */
struct Literal
{
	enum LiteralType { Number, LitString };

	Literal( const InputLoc &loc, TokenData data, LiteralType type )
		: loc(loc), data(data), type(type), caseInsensitive(false) { }

	Key makeRangeEndPoint( ParseData *pd );
	FsmAp *walk( ParseData *pd );
	
	InputLoc loc;
	TokenData data;
	LiteralType type;
	bool caseInsensitive;
};

/* Regular expression. */
struct RegExpr
{
	enum RegExpType { RecurseItem, Empty };

	/* Constructors. */
	RegExpr() : 
		type(Empty), caseInsensitive(false) { }
	RegExpr(RegExpr *regExp, ReItem *item) : 
		regExp(regExp), item(item), 
		type(RecurseItem), caseInsensitive(false) { }

	~RegExpr();
	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );

	RegExpr *regExp;
	ReItem *item;
	RegExpType type;
	bool caseInsensitive;
};

/* An item in a regular expression. */
struct ReItem
{
	enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
	
	ReItem( const InputLoc &loc, char c ) 
		: loc(loc), star(false), type(Data)
	{
		data.init( c );
	}
	ReItem( const InputLoc &loc, ReItemType type )
		: loc(loc), star(false), type(type) { }
	ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
		: loc(loc), orBlock(orBlock), star(false), type(type) { }

	~ReItem();
	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );

	InputLoc loc;
	TokenData data;
	ReOrBlock *orBlock;
	bool star;
	ReItemType type;
};

/* An or block item. */
struct ReOrBlock
{
	enum ReOrBlockType { RecurseItem, Empty };

	/* Constructors. */
	ReOrBlock()
		: type(Empty) { }
	ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
		: orBlock(orBlock), item(item), type(RecurseItem) { }

	~ReOrBlock();
	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
	
	ReOrBlock *orBlock;
	ReOrItem *item;
	ReOrBlockType type;
};

/* An item in an or block. */
struct ReOrItem
{
	enum ReOrItemType { Data, Range };

	ReOrItem( const InputLoc &loc, char c ) 
		: loc(loc), type(Data)
	{
		data.init( c );
	}
	ReOrItem( const InputLoc &loc, char lower, char upper )
		: loc(loc), lower(lower), upper(upper), type(Range) { }

	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );

	InputLoc loc;
	TokenData data;
	char lower;
	char upper;
	ReOrItemType type;
};


/*
 * Inline code tree
 */
struct InlineList;
struct InlineItem
{
	enum Type 
	{
		Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
		Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
		LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
		LmInitTokStart, LmSetTokStart, Break
	};

	InlineItem( const InputLoc &loc, char *data, Type type ) : 
		loc(loc), data(data), nameRef(0), children(0), type(type) { }

	InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : 
		loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }

	InlineItem( const InputLoc &loc, LongestMatch *longestMatch, 
		LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
		nameRef(0), children(0), longestMatch(longestMatch),
		longestMatchPart(longestMatchPart), type(type) { } 

	InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : 
		loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
		type(type) { }

	InlineItem( const InputLoc &loc, Type type ) : 
		loc(loc), data(0), nameRef(0), children(0), type(type) { }
	
	InputLoc loc;
	char *data;
	NameRef *nameRef;
	NameInst *nameTarg;
	InlineList *children;
	LongestMatch *longestMatch;
	LongestMatchPart *longestMatchPart;
	Type type;

	InlineItem *prev, *next;
};

/* Normally this would be atypedef, but that would entail including DList from
 * ptreetypes, which should be just typedef forwards. */
struct InlineList : public DList<InlineItem> { };

#endif /* _PARSETREE_H */
