/*
 * This file is part of Magellan <http://www.kAlliance.org/Magellan>
 *
 * Copyright (c) 1998-2000 Teodor Mihai <teddy@ireland.com>
 * Copyright (c) 1998-2000 Laur Ivan <laur.ivan@ul.ie>
 * Copyright (c) 1999-2000 Virgil Palanciuc <vv@ulise.cs.pub.ro>
 *
 * Requires the Qt widget libraries, available at no cost at
 * http://www.troll.no/
 *
 * Also requires the KDE libraries, available at no cost at
 * http://www.kde.org/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
 * copies of the Software, and to permit persons to whom the Software is 
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
 * IN THE SOFTWARE.
 */

#include <qstring.h>
#include <qregexp.h>
#include <htmlprocessing.h>
#include <qstringlist.h>
#include <countriesclass.h>
#include <ctype.h>
#include <stdio.h>

//#define DEBUG_HP
#define IDSTRING "HtmlProcessor: "

QString HtmlProcessor::VALID_ID_CHAR=   "abcdefghijklmnopqrstuvwxyz"\
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"\
                        "0123456789"\
                        "_-\0";
QString HtmlProcessor::VALID_NAME_CHAR= "abcdefghijklmnopqrstuvwxyz"\
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"\
                        "0123456789"\
                        "_-.%%#~\0";   // % is from %20 (space) & comp
                                      // # is from anchors
                                      // ~ is from home dirs
QString HtmlProcessor::VALID_NAME_MAIL_CHAR= "abcdefghijklmnopqrstuvwxyz"\
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"\
                        "0123456789"\
                        "_-./%%#~\0";   // % is from %20 (space) & comp
                                      // # is from anchors
                                      // ~ is from home dirs
QString HtmlProcessor::EXT_NAME_CHAR=   "abcdefghijklmnopqrstuvwxyz"\
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"\
                        "0123456789"\
                        "_-.%%@/#~?!+=\0"; // % is from %20 (space) & comp
                                          // # is from anchors
                                          // ~ is from home dirs
QString HtmlProcessor::NUMBER_CHAR=     "0123456789";



HtmlProcessor *HtmlProcessor::hpInst=0;


HtmlProcessor *HtmlProcessor::thisInstance()
{
	if(!hpInst)
		hpInst=new HtmlProcessor;
	return hpInst;
}

HtmlProcessor::HtmlProcessor()
{
	// init the text & html
	text.clear(); html.clear();
  text.append("\t"); 	html.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
  text.append(" "); 	html.append("&nbsp;");
  text.append("<"); 	html.append("&lt;");
  text.append(">"); 	html.append("&gt;");
  text.append("&"); 	html.append("&amp;");
  text.append("\""); 	html.append("&quot;");
  text.append(""); 	html.append("&pound;");
  text.append("|"); 	html.append("&brvbar;");
  text.append(""); 	html.append("&not;");
  text.append("\n"); 	html.append("<br>\n");
}

bool HtmlProcessor::inHtml(QString &buffer, int pos)
{
	int prevGt, prevLt, nextLt, nextGt;
	prevGt=buffer.findRev(">",pos);
	prevLt=buffer.findRev("<",pos);
	nextGt=buffer.find(">",pos);
	nextLt=buffer.find("<",pos);
	bool startTag=((prevLt==-1)?(prevGt+1):prevLt)>((prevGt==-1)?(prevLt-1):prevGt);
	if(prevLt==-1 && prevGt==-1)
		startTag=false;
	bool endTag=((nextGt==-1)?(nextLt-1):nextGt)<((nextLt==-1)?(nextGt+1):nextLt);
	if(nextLt==-1 && nextGt==-1)
		endTag=false;
#ifdef DEBUG_HP
	printf("prev:(%d, %d) after:(%d %d): %d %d\n", prevLt, prevGt, nextLt, nextGt, startTag, endTag);
#endif
	// check if it's in a tag
	if(startTag && endTag)
	{
#ifdef DEBUG_HP
		printf(" In a tag\n");
#endif
		return true;
	}
	return false;
}

void HtmlProcessor::domainCheck(QString &buffer, int &start, int &end, int urlLen, bool isMail)
{

	int ender=buffer.find(QRegExp("[/<\\[\\]>&\\s\\,]"),start+urlLen);
	int fen=end;
#ifdef DEBUG_HP
	printf("ender: %d %d [%s]\n", start+urlLen, ender, (const char *)buffer.mid(start+urlLen, ender-start-urlLen));
	printf("buffer: [%s]\n", (const char *)buffer.mid(start, end-start));
#endif
	if(ender==-1)
		ender=end;
	QString buf=buffer.mid(start+urlLen, ender-start-urlLen).lower();
#ifdef DEBUG_HP
	printf("ender: %d %d [%s]\n", start+urlLen, ender, (const char *)buf);
#endif
	QStringList parts=QStringList::split(".",buf);
#ifdef DEBUG_HP
	printf("Domain check: %d parts:[%s]\n",parts.count(), (const char *)buf);
	for(int i=0;i<parts.count();i++)
		printf("\t[%s]\n",(const char *)parts[i]);
#endif
	int dom=-1, i;
	for(i=parts.count()-1; i>=0;i--)
		if(CountriesClass::thisInstance()->isDomain(parts[i], CountriesClass::AllUnique))
		{
			dom=i; i=-1;
		}
	if(!isMail)
	{
		if(dom!=parts.count()-1)
		{
			if(dom==-1)
				start=end; // don't quote anything coz is not valid
			else
			{
				int len=0;
				for(int i=0;i<dom;i++)
					len+=parts[i].length();
				len+=dom-1;
				end=start+len+urlLen+1;
			}
		}
		else
		{
			// check for valid "slash things"
				int len=0;
				for(int i=0;i<dom;i++)
					len+=parts[i].length();
				len+=dom-1;
				len=start+len+urlLen+1;
				QStringList itemsList=QStringList::split("/",buffer.mid(len, end-len));
				int offset=0;
				for(int i=0;i<itemsList.count();i++)
				{
#ifdef DEBUG_HP
					printf("\t\tITEM: %d [%s]\n", i, (const char *)itemsList[i]);
#endif
					char c=((const char *)itemsList[i])[0];
					bool valid=isalnum(c) || 
							c=='-' || c=='_' || c=='~' || c=='.' || c=='#' || c=='@' ||
							itemsList[i].length()==0;
					if(!valid)
					{
						end=len+offset;
						return;
					}
					else
						offset += itemsList[i].length()+1;
				}
		}
	}
	else
	{
		if(dom==-1)
			start=end;
		else
		{
			int len=0;
			for(int i=0;i<=dom;i++)
				len+=parts[i].length();
			len+=dom-1;
			end=start+len+urlLen+1;
		}
	}
}

void HtmlProcessor::replaceChunk(QString &buffer, QString url, QString href, bool strict)
{
	int end=buffer.length();
	int pos=0;
	while(pos<end)
	{
		pos=buffer.find(url,pos,false);
		if(pos==-1)
			pos=end+1;
		else
		{
			// start processing/replcaing the stuff...
			if(inHtml(buffer,pos))
			{
#ifdef DEBUG_HP
				printf("It's in a tag. not quoting...\n");
#endif
			}
			else
			{
				//quoting
				int endTag=buffer.find(" ",pos);
#ifdef DEBUG_HP
				printf("\t[%s]\n", (const char *)buffer.right(pos));
				printf("\t%d %d %d\n", pos, endTag, buffer.length());
#endif
				if(endTag==-1 || endTag>end) endTag=end;
				domainCheck(buffer, pos, endTag, url.length(), strict);
				if(pos<endTag)
				{
#ifdef DEBUG_HP
					printf(" Replacing [%s]\n",(const char *)buffer.mid(pos, endTag-pos));
#endif
					QString _url=href+buffer.mid(pos, endTag-pos);
					removeTags(_url);
					QString replacer= QString("<a href=\"")+_url+"\">"+
							buffer.mid(pos, endTag-pos)+"</a>";
					buffer.replace(pos, endTag-pos, replacer);
					pos+=replacer.length();
					end=buffer.length();
				}
			}
		}
		pos++;
	}
}


void HtmlProcessor::replaceURLs(QString url, QString &buffer, QString href, bool strict)
{
	int pos=-1;
	int startChunk=0;
	int endChunk=0;
	QString b;
#ifdef DEBUG_HP
	printf(" Init buffer: <>%s<>\n",(const char *)buffer);
#endif
	while(pos!=buffer.length())
	{
		pos=buffer.find("<a href=",pos+1,false);
		if(pos==-1)
			pos=buffer.length();
		endChunk=pos-1;
		b=buffer.mid(startChunk, endChunk-startChunk+1);
		int len = b.length();
#ifdef DEBUG_HP
		printf("avalable buffer: [%s] %d %d\n",(const char *)b, startChunk, endChunk);
#endif
		replaceChunk(b, url, href, strict);
		if(b.length()!=len)
			buffer.replace(startChunk, len, b);
		startChunk=buffer.find("</a>",endChunk, false);
		if(startChunk==-1)
			startChunk==buffer.length();
		else
			startChunk+=4;
		pos=endChunk+1;
	}
#ifdef DEBUG_HP
	printf("Ready...\n");
#endif
}

void HtmlProcessor::replaceEmails(QString &buffer)
{
	int pos=-1;
	int startChunk=0;
	int endChunk=0;
	QString b;
	while(pos!=buffer.length())
	{
		pos=buffer.find("<a href=",pos+1,false);
		if(pos==-1)
			pos=buffer.length();
		endChunk=pos-1;
		b=buffer.mid(startChunk, endChunk-startChunk+1);
		int len = b.length();
#ifdef DEBUG_HP
		printf("avalable buffer(mails): [%s]\n",(const char *)b);
#endif
		replaceEmailChunk(b);
		if(b.length()!=len)
			buffer.replace(startChunk, len, b);
		startChunk=buffer.find("</a>",endChunk, false);
		if(startChunk==-1)
			startChunk==buffer.length();
		else
			startChunk+=4;
		pos=endChunk+1;
	}
#ifdef DEBUG_HP
	printf("Ready...\n");
#endif
}

void HtmlProcessor::replaceEmailChunk(QString &buffer)
{
	int pos=1;
	int oldDelim=0;
	QRegExp rExp("[a-zA-Z0-9\\.\\-_~]*");
	QRegExp rExp2("\\s");
	
	while(pos<buffer.length())
	{
		pos=buffer.find('@',pos);
#ifdef DEBUG_HP
		printf("position of @: %d\n",pos);
#endif
		if(pos==-1)
			pos=buffer.length();
		else
		{
			//getting the "right" margin
			int len;
			int right=rExp.match(buffer,pos+1,&len);
			if(len)
				right+=len;
			else
				right=pos;
			// getting whatever is before 
			int left=buffer.findRev(rExp2,pos-1);
			if(left==-1)
				left=pos;
			else
			{
				int auxRight=left;
				bool b=false;
				for (;!b;)
				{
					auxRight=rExp.match(buffer,auxRight+1,&len);
					auxRight+=len;
					if(auxRight>pos)
					{
						b=true;
						left=right;
					}
					if(auxRight==pos)
					{
						b=true;
						left=auxRight-len;
					}
				}
			}
#ifdef DEBUG_HP
			printf("chunk is :[%s] %d %d\n", (const char *)buffer.mid(left, right-left), left, right);
#endif
			domainCheck(buffer, left, right, 0, false);
			if(left<right)
			{
#ifdef DEBUG_HP
				printf(" Replacing [%s]\n",(const char *)buffer.mid(left, right-left));
#endif
				QString _url=buffer.mid(left, right-left);
				//removeTags(_url);
				QString replacer= QString("<a href=\"mailto:") + _url + "\">" +
						buffer.mid(left, right-left) + "</a>";
				buffer.replace(left, right-left, replacer);
				pos=left+replacer.length();
			}
			else
				pos++;
		}
	}
}

/******************************************************************************/

void HtmlProcessor::replaceHTMLTags(QString &str) 
{
	QRegExp r("[<> \"\t\\&\n\\|]");
	int pos=0;
	while(pos<str.length())
	{
		pos=str.find(r,pos);
		if(pos==-1)
			pos=str.length();
		else
		{
			int idx=text.findIndex(QString(str[pos]));
#ifdef DEBUG_HP
			printf("pos: %d, idx:%d\n", pos, idx);
#endif
			if((idx==1 && pos<str.length() && text.findIndex(QString(str[pos+1]))==1) || idx!=1)
			{
				str.replace(pos,1,html[idx]);
				if(html[idx].length())
					pos+=html[idx].length();
				else
					pos++;
			}
			else
				pos++;
		}
	}
}

void HtmlProcessor::eliminateComments (QString &str)
{
#ifdef DEBUG_HP
	printf(IDSTRING"eliminateComments\n");
#endif
  int ofsb=1;
  int ofse=0;
  for(int i=0;ofsb!=ofse;i++) 
	{
    ofsb=str.find("<!--");
    ofse=str.find("-->");
    if(ofsb!=ofse) 
		{
      str.remove(ofsb, ofse+3-ofsb);
    }
  } // for;
#ifdef DEBUG_HP
	printf(IDSTRING"eliminateComments +ok\n");
#endif
}

void HtmlProcessor::correctHTML (QString &s)
{
	// initially, the html code is correct.
  char oldy='>';
  int oldOffset=0;
  int flag=0;
  
  for(int i=0;i<s.length();) 
	{
    if(((const char *)s)[i]=='<') 
		{
      if(oldy=='<') 
			{ // i have <<
        s.replace(oldOffset,1,"&lt;");
        i+=4;
        oldOffset=i-1;
      } 
				else 
			{ // it's ok
        oldy='<';
        oldOffset=i;
      }
    }
    if(((const char *)s)[i]=='>') 
		{
      if(oldy=='>') 
			{ // i have >>
        s.replace(i,1,"&gt;");
      } 
				else 
			{ // it's ok
        oldy='>';
      }
    }
    i++;
  }
}

void HtmlProcessor::removeHTMLTags(QString &buffer)
{
	int pos=0;
	int end;
	QString tmp;
	while(pos>=0)
	{
		pos=buffer.find("&",pos);
		if(pos>=0)
		{
			end=buffer.find(";",pos);
			if(end-pos<6)
			{
				tmp=buffer.mid(pos, end-pos);
				int idx=html.findIndex(tmp.lower());
				if(idx!=-1)
				buffer.replace(pos, end-pos, text[idx]);
				pos+=text[idx].length();
			}
		}
	}
}



void HtmlProcessor::removeTags (QString &str)
{
	for(int pos=0;pos>=0;)
	{
		bool flag=true;
		int otherPos;
		pos=str.find("<",pos);
		if(pos==-1) flag=false;
		if(flag && ((otherPos=str.find(">",pos))==-1)) flag=false;
		if(flag && ((pos==str.findRev("<",otherPos))==-1)) flag=false;
		if(flag)
		{
			str.remove(pos,otherPos-pos+1);
		}
	}
}

/******************************** OLD ****************************/
// Determines the length of a string containing any chars from wildcard
// returns that length.
int HtmlProcessor::wildcardFind(QString s, QString wildcard, int pos)
{
#ifdef DEBUG_HP
	printf(IDSTRING"wildcardFind\n");
#endif
	int len=0;
	for(int ofs=pos; wildcard.find(s[ofs],0,false)!=-1; ofs++, len++)
	{
#ifdef DEBUG_HP2
		printf(IDSTRING"wildcardFind: <%c>\n",((const char *)s)[ofs]);
#endif
	};
#ifdef DEBUG_HP
	printf(IDSTRING"wildcardFind +ok\n");
#endif
	return len;
}

// Determines the length of a string containing any chars from wildcard
// returns that length.
int HtmlProcessor::wildcardRfind(QString s, QString wildcard, int pos)
{
#ifdef DEBUG_HP
	printf(IDSTRING"wildcardRfind\n");
#endif
	int len=0;
	for(int ofs=pos; (wildcard.find(s[ofs])!=-1) && ofs; ofs--, len++)
	{
#ifdef DEBUG_HP2
		printf(IDSTRING"wildcardRind: <%c>\n",((const char *)s)[ofs]);
#endif
	};
#ifdef DEBUG_HP
	printf(IDSTRING"wildcardRfind +ok\n");
#endif
	return len;
}

