/* 
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 * 
 * The Original Code is the Sablotron XSLT Processor.
 * 
 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
 * Portions created by Ginger Alliance are Copyright (C) 2000 Ginger
 * Alliance Ltd. All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU General Public License Version 2 or later (the
 * "GPL"), in which case the provisions of the GPL are applicable 
 * instead of those above.  If you wish to allow use of your 
 * version of this file only under the terms of the GPL and not to
 * allow others to use your version of this file under the MPL,
 * indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by
 * the GPL.  If you do not delete the provisions above, a recipient
 * may use your version of this file under either the MPL or the
 * GPL.
 */

//  parser.cpp

#include "parser.h"
#include "situa.h"
#include "tree.h"
#include "error.h"
#include "proc.h"
#include "encoding.h"

// GP: clean

// 
//
//      TreeConstructer
//
//

SAXHandler TreeConstructer::myHandlerRecord =
{
    tcStartDocument,
    tcStartElement,
    tcEndElement,
    tcStartNamespace,
    tcEndNamespace,
    tcComment,
    tcPI,
    tcCharacters,
    tcEndDocument
};

TreeConstructer::TreeConstructer(Processor *theProcessor_)
:
theProcessor( NZ(theProcessor_) )
{
    theParser = NULL;
    theTree = NULL;
    theDataLine = NULL;
    theLineNumber = 0;
    theSituation = theProcessor_->situation;
    theOutputter = NULL;
}

TreeConstructer::~TreeConstructer()
{
    cdelete(theOutputter);
}


eFlag TreeConstructer::parseDataLineUsingGivenExpat(
    Tree *t, DataLine *d, XML_Parser theParser_)
{
    theTree = t;
    theDataLine = d;
    theParser = theParser_;
    E( feedDocumentToParser(this) );
    E( t -> parseFinished() );
    if (t -> XSLTree)
        t -> stripped += t -> root.strip();
    return OK;
};


eFlag TreeConstructer::parseDataLineUsingExpat(
    Tree *t, DataLine *d)
{
    theParser = XML_ParserCreateNS(NULL, THE_NAMESPACE_SEPARATOR);
    M( theSituation, theParser );
    // XML_UseParserAsHandlerArg(parser);
    XML_SetElementHandler(theParser, 
        tcStartElement,
        tcEndElement);
    XML_SetCharacterDataHandler(theParser, 
        tcCharacters);
    XML_SetNamespaceDeclHandler(theParser, 
        tcStartNamespace, 
        tcEndNamespace);
    XML_SetCommentHandler(theParser,
        tcComment);
    XML_SetProcessingInstructionHandler(theParser,
        tcPI);
    // the unknown encoding handler is no more used:
    // XML_SetUnknownEncodingHandler(theParser, 
    //     tcUnknownEncoding, 
    //     NULL);

    XML_SetExternalEntityRefHandler(theParser,
        tcExternalEntityRef);
    XML_SetUserData(theParser, this);
    XML_SetBase(theParser, theProcessor -> findBaseURI( t -> name ));

    eFlag eCode = parseDataLineUsingGivenExpat(t, d, theParser);

    XML_ParserFree(theParser);
    E( eCode );
    return OK;
};

eFlag TreeConstructer::parseUsingSAX(Tree *t)
{
    theTree = t;
    theDataLine = NULL;
    M( theSituation, theOutputter = new OutputterObj(theProcessor) );
    // register the handler with the outputter
    // GP: OK (theOutputter is a member)
    E( theOutputter -> setOptionsSAX(&myHandlerRecord, this) );
    E( theOutputter -> eventBeginOutput() );
    return OK;
}

eFlag TreeConstructer::parseUsingSAXForAWhile()
// it's assumed that this is a tree constructer over a dataline
// (not a SAX one)
{
    // removing: 
    //      assert(!theOutputter);
    // since parseUsingSaxForAWhile can be called recursively
    assert(theTree && theDataLine);
    M( theSituation, theOutputter = new OutputterObj(theProcessor) );
    // GP: OK (theOutputter is a member)
    // register the handler with the outputter
    E( theOutputter -> setOptionsSAX(&myHandlerRecord, this) );
    E( theProcessor -> pushOutputter(theOutputter) );
    E( theOutputter -> eventBeginOutput() );
    return OK;
}

eFlag TreeConstructer::parseUsingSAXForAWhileDone()
{
    assert(theOutputter);
    E( theOutputter -> eventEndOutput() );
    E( theProcessor -> popOutputter() ); // deletes the outputter
    // the following was: 
    //   theOutputter = NULL;
    theOutputter = theProcessor -> outputter();
    return OK;
}

int TreeConstructer::getCurrentLineNumber()
{
    if (theParser)
        return XML_GetCurrentLineNumber(theParser);
    else
        return theLineNumber;
}
  
/* static */
eFlag TreeConstructer::getDocEncoding(const char *buf, Str& theEncoding, TreeConstructer *this_)
// assumes the parse buffer null-terminated
{
    switch (*(const unsigned short*)buf)
    {
    case 0xfeff:
    case 0xfffe:
        theEncoding = "UTF-16";
        break;
    case 0x003c:
    case 0x3c00:
        // no byte order mark!
        if (((const unsigned short*)buf)[1] == 0x003f ||
            ((const unsigned short*)buf)[1] == 0x3f00)
            theEncoding = "UTF-16";
        else
            Warn(this_ -> theSituation, W_BAD_START);
        break;
    case 0x0000:
        if (((const unsigned short*)buf)[1] == 0x003c ||
            ((const unsigned short*)buf)[1] == 0x3c00 ) 
            theEncoding = "ISO-10646-UCS-4";
        else
            Warn(this_ -> theSituation, W_BAD_START);
        break;
    default:
        {
            if (buf[0] == '<' && buf[1] == '?' && buf[2] == 'x' && buf[3] == 'm')
            {
                const char *p = buf;
                p = strpbrk(p + 2,"=?");
                if (p && *p == '=')
                {
                    // found '=' in version, search on
                    p = strpbrk(p + 1, "=?");
                    if (p && *p == '=')
                    {
                        p++;
                        skipWhite(p);

                        const char *q = strpbrk(p + 1, "?\'\"");
                        if (q && *q != '?' && *q == *p)
                        {
                            theEncoding.nset(p + 1, q - p - 1);
                            break;
                        }
                    }
                }
            }
            theEncoding = "UTF-8";
        };
    }  
    return OK;
}


/* static */
eFlag TreeConstructer::feedDocumentToParser(void* constructer)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    SituationObj &situation_ = *(this_ -> theSituation);
    char convBuffer[PARSE_CONV_BUFSIZE];
    char rawBuffer[PARSE_BUFSIZE + 1];
    char *outbuf = convBuffer;
    const char *inbuf = rawBuffer;
    rawBuffer[PARSE_BUFSIZE] = 0;

    Bool quit = FALSE, firstTime = TRUE;
    Bool mustConvert = FALSE, haveReadAll = FALSE;
    Processor *proc_ = this_ -> theProcessor;
    int res, bytes;
    size_t inleft = 0, outleft = 0;
    EncResult convResult = ENC_OK;
    CDesc cd = (CDesc) -1;
    void *recoderUD;
    EncHandler *recoder = proc_ -> getEncHandler(&recoderUD);
    Str theEncoding;
    
    if (situation_.isError())
        return NOT_OK;
    XML_Parser parser = NZ( this_ -> theParser );

    situation_.pushCurrent();
    situation_.setCurrFile(this_ -> theDataLine -> fullUri);

    // GP: tree constructer does get killed on error
    // callers are Processor::parse (stack variable) and ext ent handler (disposes)

    // this is to add the 'xml' namespace declarations
    //
    tcStartNamespace(constructer, "xml", theXMLNamespace);

    while (!quit)
    {
//        char *buf = (char*) XML_GetBuffer(parser, PARSE_BUFSIZE + 1);
        switch (convResult)
        {
        case ENC_OK:
            {
                // just get the next block of data
                bytes = this_ -> theDataLine -> get(rawBuffer, PARSE_BUFSIZE); 
                haveReadAll = (bytes < PARSE_BUFSIZE);
                inbuf = rawBuffer;
                inleft = bytes;
            }; break;
        case ENC_EINVAL:
            {
                // buffer ended with an incomplete sequence
                // copy the sequence
                memmove(rawBuffer, inbuf, inleft);
                // get the rest from dataline
                bytes = this_ -> theDataLine -> get(rawBuffer + inleft, PARSE_BUFSIZE - inleft); 
                haveReadAll = (bytes < PARSE_BUFSIZE - (int) inleft);
                inbuf = rawBuffer;
                inleft += bytes;
            }; break;
        case ENC_E2BIG:
            {
                // the converted text was too big to fit in convBuffer
                // don't get more, just convert from input buffer
                // not changing haveReadAll, inbuf or inleft
            }; break;
        default:
            assert(!"feedDocumentToParser");
        }

        // the test for bytes==-1 is superfluous but just in case
        if (bytes == -1 || situation_.isError())
        {
            // read error, bailing out
            XML_ParserFree(parser);
            return NOT_OK;
        };

        if (firstTime)
        {
            // find the document's encoding
            E( getDocEncoding(rawBuffer, theEncoding, this_) );
            // decide whether to recode or not
            if (proc_ -> theRecoder.handledByExpat(theEncoding))
                mustConvert = FALSE;
            else
            {
                mustConvert = TRUE;
                XML_SetEncoding(parser, "UTF-8");
                E( proc_ -> theRecoder.openToUTF8(theEncoding, cd) );
            }
        }
        
        if (mustConvert)
        {
            outleft = PARSE_CONV_BUFSIZE;
            outbuf = convBuffer;
            E( proc_ -> theRecoder.conv(cd,
                inbuf, inleft, outbuf, outleft, 
                convResult) );
            switch(convResult)
            {
            case ENC_OK:
                quit = haveReadAll;
                break;
            case ENC_EINVAL:
            case ENC_E2BIG:
                quit = FALSE;
                break;
            case ENC_EILSEQ:
                Err1(proc_ -> situation, E1_BAD_CHAR, theEncoding);
                break;
            default:
                assert(!"bad convResult");
            };
            bytes = PARSE_CONV_BUFSIZE - outleft;
            res = XML_Parse(parser,convBuffer,bytes,quit);
        }
        else
        {
            quit = haveReadAll;
            res = XML_Parse(parser,rawBuffer,bytes,quit);
        }
        if (situation_.isError())
            return NOT_OK;
        if (!res) 
        {
            // parsing error to be reported
            // situation_.setCurrFile(t -> name);   is unnecessary as already set

            // hack to avoid an apparent bug in expat causing crashes when an UTF-8 text
            // happens to start with a byte order mark by mistake
            if (!(firstTime && rawBuffer[0] == (char) 0xEF && 
                rawBuffer[1] == (char) 0xBB && rawBuffer[2] == (char) 0xBF))
                situation_.setCurrLine(XML_GetCurrentLineNumber(parser));
            int code = XML_GetErrorCode(parser); 
            Str eCodeStr, eNameStr;
            eCodeStr = code;
            eNameStr = (char*) XML_ErrorString(code);
            // XML_ParserFree(parser); -- done later
            Err2(this_ -> theSituation, E_XML, eCodeStr, eNameStr);
        }
        firstTime = FALSE;
    }
    // remove the 'xml' namespace declarations
    //
    tcEndNamespace(constructer, "xml");

    situation_.popCurrent();
    return OK;
}

//
//  tcStartDocument
//  callback for the document start event
//

/* static */
void TreeConstructer::tcStartDocument(
    void* constructer)
{
/*
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;
*/
};



//
//  tcStartElement
//  callback for the element start event
//

/* static */
void TreeConstructer::tcStartElement(
    void *constructer,const char *elName,const char **atts)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    Tree *t = this_ -> theTree;
    char **p = (char**) atts;
    XSL_OP opCode;
    XSL_ATT attCode;
    BOOL itsXSL = FALSE;
    Vertex *v;  // GP: OK (goes to tree)
    Attribute *a;   // GP: OK (goes to tree)
    QName q(this_ -> theProcessor);
    int elemLine;

    if (this_ -> theSituation -> isError())
        return;
    this_ -> theSituation -> setCurrLine(
        elemLine = this_ -> getCurrentLineNumber());
    if (setQNameFromExpat(this_, q, elName))
        return;
    if ((t -> XSLTree) && (q.uri == this_ -> theProcessor -> stdPhrase(PHRASE_XSL_NAMESPACE)))
    {
        itsXSL = TRUE;
        opCode = (XSL_OP) lookup((char*) q.getLocal(),xslOpNames);
        if (opCode == XSL_NONE)
        {
            this_ -> theSituation -> error(ET_BAD_XSL, 
					   "" /**theEmptyString*/, 
					   "" /**theEmptyString*/);
            return;
        };
        v = new(this_ -> theProcessor -> getArena()) XSLElement(q, t, opCode, this_ -> theProcessor);
    }
    else
        v = new(this_ -> theProcessor -> getArena()) Element(q, t, this_ -> theProcessor);
    v -> lineno = elemLine;
    t -> appendVertex(v);
    t -> pendingNS.giveCurrent(toE(v) -> namespaces, t);
    toE(v) -> name.findPrefix(toE(v) -> namespaces);
    
    while(*p)
    {
        if (setQNameFromExpat(this_, q, (char *)p[0]))
            return;
        q.findPrefix(toE(v) -> namespaces);
        attCode = (itsXSL ? 
            (XSL_ATT) lookup((char*)q.getLocal(),xslAttNames) : XSLA_NONE);
        a = new(this_ -> theProcessor -> getArena()) 
            Attribute(q,p[1],attCode,this_ -> theProcessor);
        a -> lineno = this_ -> getCurrentLineNumber();
        t -> appendVertex(a);
        p += 2;
    };
    
    if (itsXSL)
    {
        if (toX(v) -> checkAtts()) return; 
        // also check if a toplevel element does not have a non-stylesheet parent
        if (toX(v) -> checkToplevel()) return; 
    }
    else
    {
        if (t -> XSLTree)
        {
            int k, 
                kLimit = toE(v) -> atts.number();
            for (k = 0; k < kLimit; k++)
                if (toA(toE(v) -> atts[k]) -> buildExpr(
                    TRUE, EX_NONE)) return;
            // this calls situation.error() on error
        }
    }
}


//
//  tcEndElement
//  callback for the element end event
//

/* static */
void TreeConstructer::tcEndElement(
    void* constructer, const char* name)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    Vertex *v = NZ( t -> stackTop );

    t -> flushPendingText();
    
    if (t -> XSLTree)
        t -> stripped += 
        (isXSLElement(v)? toX(v) : cast(Daddy*, v))
            -> strip();

    if(isXSLElement(v))
    {
        // situation.error() is called in the following
        if (toX(v) -> checkChildren())
            return;
    }
    if (this_ -> theProcessor -> processVertexAfterParse(v, t, this_))
        return;
    
    // t -> popVertex();    this is done in processVertexAfterParse
}


//
//  tcStartNamespace
//  callback for the namespace scope start event
//

/* static */
void TreeConstructer::tcStartNamespace(
    void* constructer, const char* prefix, const char* uri)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    Vertex *newv;   // GP: OK (goes to tree)

    Phrase prefixPh, uriPh;
    if (prefix)
        this_ -> theProcessor -> dict().insert(prefix, prefixPh);
    else
        prefixPh = UNDEF_PHRASE;
    this_ -> theProcessor -> dict().insert(uri, uriPh);

    t -> pendingNS.appendAndSetOrdinal(
        newv = new(this_ -> theProcessor -> getArena()) NmSpace(
            prefixPh, uriPh, this_ -> theProcessor));
    newv -> lineno = this_ -> getCurrentLineNumber();
    
    // warn on obsolete namespace
    if (uri && !strcmp(oldXSLTNamespace, uri)) /* _PH_ */
        Warn1(this_ -> theSituation, W1_OLD_NS_USED, (char*)uri)
    else
    {
        if (prefix && !strcmp(prefix, "xsl") && 
	      uri && strcmp(theXSLTNamespace, uri)) /* _PH_ */
	  Warn1(this_ -> theSituation, W1_XSL_NOT_XSL, (char*) uri);
    }
};



//
//  tcEndNamespace
//  callback for the namespace scope end event
//

/* static */
void TreeConstructer::tcEndNamespace(
    void* constructer, const char* prefix)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

#ifdef _DEBUG
    // hope this works
    Phrase prefixPh;
    if (prefix)
        prefixPh = this_ -> theProcessor -> dict().lookup(prefix);
    else
        prefixPh = UNDEF_PHRASE;
    assert(toNS(t -> pendingNS.last()) -> prefix == prefixPh);
#endif
    t -> pendingNS.freelast(FALSE);
};


//
//  tcComment
//  callback for the comment event
//

/* static */
void TreeConstructer::tcComment(
    void* constructer, const char* contents)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    if (t -> XSLTree)
        return;

    Comment *newNode;   // GP: OK
    newNode = new(this_ -> theProcessor -> getArena()) Comment(contents, this_ -> theProcessor);
    newNode -> lineno = this_ -> getCurrentLineNumber();
    t -> appendVertex(newNode);
};


//
//  tcPI
//  callback for the processing instruction event
//

/* static */
void TreeConstructer::tcPI(
    void* constructer, const char* target, const char* contents)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    if (t -> XSLTree)
        return;

    ProcInstr *newNode; // GP: OK
    Phrase targetPh;
    this_ -> theProcessor -> dict().insert(target, targetPh);

    newNode = new(this_ -> theProcessor -> getArena())
        ProcInstr(targetPh, contents, this_ -> theProcessor);
    newNode -> lineno = this_ -> getCurrentLineNumber();
    t -> appendVertex(newNode);
};


//
//  tcCharacters
//  callback for the character data ("text") event
//

/* static */
void TreeConstructer::tcCharacters(
    void* constructer, const char* contents, int length)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    Vertex *newVertex;  // GP: OK
    if (!!(newVertex = t -> appendText((char *) contents, length)))
        newVertex -> lineno = this_ -> getCurrentLineNumber();
};



//
//  tcEndDocument
//  callback for the document end event
//

/* static */
void TreeConstructer::tcEndDocument(
    void* constructer)
{
/*
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;
*/
};


//
//  tcUnknownEncoding
//  callback for the unknown encoding event (expat)
//  needs to have "enc1250.h" included
//


// the unknown encoding handler is no more used:
/* static */
/*
int TreeConstructer::tcUnknownEncoding(
    void *encodingHandlerData, const char *name, XML_Encoding *info)
{
    int *theTable;
    if (strEqNoCase((char*) name,"windows-1250"))
        theTable = Enc1250;
    else if (strEqNoCase((char*) name,"iso-8859-2"))
        theTable = EncLatin2;
    else
        return 0;
    int i;
    for (i = 0; i < 0x80; i++)
    {
        info -> map[i] = i;
        info -> map[i + 0x80] = theTable[i];
    }
    info -> map[0x7f] = -1;
    info -> data = NULL;
    info -> convert = NULL;
    info -> release = NULL;
    return 1;
};
*/

//
//  tcExternalEntity
//  callback for the external entity reference event (expat)
//

/* static */
int TreeConstructer::tcExternalEntityRef(
    XML_Parser parser, const char* context, const char* base,
    const char* systemId, const char* publicId)
{
    TreeConstructer *this_ =
        (TreeConstructer*) XML_GetUserData(parser);
    if (this_ -> theSituation -> isError())
        return 0;
    Tree *t = this_ -> theTree;

    SituationObj* situation = this_ -> theSituation;

    Log1(situation, L1_READING_EXT_ENTITY, systemId);    
    XML_Parser newParser =
        XML_ExternalEntityParserCreate(parser, context, /* encoding= */ NULL);
    if (!newParser)
        return 0;

    DataLine *newDL;
    Str absolute;
    E( makeAbsoluteURI(systemId, base, absolute) );
    E( this_ -> theProcessor -> addLineNoTree(newDL, absolute, t -> XSLTree) );
    if (!newDL)
    {
        XML_ParserFree(newParser);
        return 0;
    }

    TreeConstructer *newTC;
    M( situation, newTC = new TreeConstructer(this_ -> theProcessor));
    eFlag code = newTC -> parseDataLineUsingGivenExpat(t, newDL, newParser);
    XML_ParserFree(newParser);
    delete newTC;
    return code ? 0 : 1;
}

/* static */
eFlag TreeConstructer::setQNameFromExpat(TreeConstructer* this_, QName& qname_, const char* text)
{
    char *p = (char*) text,
        *q = strchr(p, THE_NAMESPACE_SEPARATOR);
    if (q)
    {
        *q = 0;
        qname_.setUri(p);
        *q = NS_SEP;
        qname_.setLocal(q+1);
        if (strchr(q+1,':'))
        {
            DStr msg = "{";
            msg += qname_.getUri();
            msg += "}:";
            msg += qname_.getLocal();
            Err1(this_ -> theProcessor -> situation, E1_EXTRA_COLON, (char *)msg);
        }
    }
    else
    {
        qname_.uri = UNDEF_PHRASE;
        qname_.setLocal(p);
        char *isColon;
        qname_.prefix = UNDEF_PHRASE;
        if (!!(isColon = strchr(p,':')))
        {
            *isColon = 0;

            // fixing what appears a bug in expat - sometimes the xml:lang attr arrives unexpanded
            // apparently only in XSL and when it's not at the top level
            if (!strEqNoCase(p,"xml"))
                Err1(this_ -> theProcessor -> situation, ET_BAD_PREFIX,p)
            else
            {
                qname_.setLocal(isColon + 1);
                qname_.uri = this_ -> theProcessor -> stdPhrase(PHRASE_XML_NAMESPACE);
                E( this_ -> theProcessor -> dict().insert("xml",qname_.prefix) );                
            }
        }
    };
    return OK;
}

OutputterObj *TreeConstructer::getOutputter()
{
    return theOutputter;
}
