/* Strip Club - Online/Offline Comic Reader/Archiver
 *
 * Copyright notice for this file:
 *  Copyright (C) 2004,2005 Benjamin Cutler
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <pcre.h>
#include <stdio.h>
#include <string.h>
#include <FL/Fl_Image.H>
#include <FL/Fl_Box.H>
#include <FL/Fl_Group.H>
#include "interface.h"
#include "load.h"
#include "html.h"
#include "http.h"
#include "url.h"

extern Fl_Preferences *CurComicCacheGroup;

inline void crop(char *src) {
	if (src[0] == '\'' || src[0] == '\"') {
		memmove(src, (src + 1), strlen(src) + 1);		// Chop off the ' or " at the start
	}
	if (src[strlen(src) - 1] == '\'' || src[strlen(src) - 1] == '\"') {
		src[strlen(src) - 1] = 0;	// Chop off the end
	}
}

void FindImages(const char *File, const char *Pattern, sURL *Source) {
	FILE *fptr;
	char tmp, buf[400], ImageFileName[400], *URL = NULL;
	int tagpos = 0, numimage = 0, length, lastupdate = 0;
	pcre *compile, *crap;
	pcre_extra *extra;
	const char *error;
	int erroffset;
	bool intag = false;
	int result = 0, ovector[30];

	Out("Opening \"%s\" to find images\n", File);

	if (!File) {
		DbgOut("NULL pointer passed to FindImages()... unable to retrieve HTML?\n");
		return;
	}

	DbgOut("Search Pattern: %s\n", Pattern);

	if (!(fptr = fopen(File, "r"))) {
		MOut("Unable to open %s!\n", File);
		return;
	}

	fseek(fptr, 0, SEEK_END);

	length = ftell(fptr);

	fseek(fptr, 0, SEEK_SET);

	compile = pcre_compile(Pattern, PCRE_CASELESS || PCRE_DOTALL, &error, &erroffset, NULL);

	if (!compile) {
		MOut("Error compiling pattern: %s\nError at offset: %d\nPattern: %s\n", error, erroffset, Pattern);
		fclose(fptr);
		return;
	}

	// Keenspot has the previous and next images as 1x1 images to make archive browsing faster, but we don't want them

	crap = pcre_compile("<img.*?WIDTH=\"1\".*?>", PCRE_CASELESS, &error, &erroffset, NULL);		

	extra = pcre_study(compile, 0, &error);

	Interface.StatusRange(0.0, (float)length);
	Interface.StatusValue(0.0);
	Interface.StatusLabel("Image searching...");

	do {
		do {
			while (1) {
				fread(&tmp, 1, 1, fptr);
				if (feof(fptr)) {
					break;
				}
				if (tmp == '<') {
					intag = true;
					tagpos = 1;
					buf[0] = '<';
				} else {
					if (intag) {
						if (tagpos == 400) {
							intag = false;
							continue;
						}
						if ((tmp != '\n') && (tmp != '\r')) {		// Ignore newlines, they confuse the parser
							buf[tagpos++] = tmp;
						}
						if (tmp == '>') {
							intag = false;
							buf[tagpos] = '\0';
							break;
						}
					}
				}
			}
			if (feof(fptr)) {
				result = -20;
				Out("Done parsing\n");
				break;
			}
			if ((ftell(fptr) - 2048) > lastupdate) {
				Interface.StatusValue((float)ftell(fptr));
				lastupdate = ftell(fptr);
			}
			result = pcre_exec(compile, extra, buf, strlen(buf), 0, 0, ovector, 30);
		} while (result < 0);
		if (result != -20) {
			if (pcre_exec(crap, NULL, buf, strlen(buf), 0, 0, 0, 0) < 0) {		// Filter out the images we don't want
				sURL *NewURL;
				if (result) { 
					pcre_copy_substring(buf, ovector, result, 1, ImageFileName, 399);
					result = 0;
				}
				free(URL);
				URL = NULL;
				if (strncmp(ImageFileName, "http://", 7)) {
					URL = strdup(MakeNewURL(ImageFileName, Source));
				} else {
					URL = strdup(ImageFileName);
				}
				Out("Found Image: %s\n", URL);
				NewURL = URLCreate(URL);
				if (CurComicG->entryExists("imgfilepattern")) {
					pcre *compile;
					char *pattern, ImageFile[40];
					const char *error;
					int erroroffset, result, ovector[10];
					CurComicG->get("imgfilepattern", pattern, "");
					DbgOut("%s\n", pattern);
					compile = pcre_compile(pattern, PCRE_CASELESS | PCRE_DOTALL, &error, &erroroffset, NULL);
					free(pattern);
					if (!compile) {
						MOut("Error compiling Image File Pattern:\n%s\nError at offset %d\n", error, erroroffset);
						continue;
					}
					result = pcre_exec(compile, NULL, URL, strlen(URL), 0, 0, ovector, 10);
					if (result < 0) {
						MOut("Image URL does not match Image File Pattern!\n");
						pcre_free(compile);
						continue;
					}
					pcre_copy_substring(URL, ovector, result, 1, ImageFile, 40);
					DbgOut("ImageFile = %s\n", ImageFile);
					pcre_free(compile);
					CurComicCacheGroup->set(Fl_Preferences::Name("Image%d", numimage++), ImageFile);
				} else {
					CurComicCacheGroup->set(Fl_Preferences::Name("Image%d", numimage++), NewURL->File);
				}
				if (!GrabHTTPLink(URL, URLUnbreak(Source))) {
					EOut("Error downloading %s\n", ImageFileName);		// We can ignore the return unless it fails, in which case warn the user
				}
				URLDelete(NewURL);
			}
		} else {
			break;
		}
	} while (1);

	fclose(fptr);

	Interface.StatusLabel("");
	Interface.StatusValue(0.0);

	pcre_free(compile);
	pcre_free(crap);
	pcre_free(extra);

	free(URL);

	if(!numimage) {
		EOut("No images found! Incorrect URL, or incorrect search pattern?\n");
	}
	CurComicCacheGroup->set("numimage", numimage);
}

void FindLinks(const char *File, sURL *Source) {
	FILE *fptr;
	char *PrevPattern, *NextPattern;
	pcre *PrevCompile, *NextCompile, *ExtractLink;
	pcre_extra *PrevExtra, *NextExtra;
	const char *error;
	char tmp, buf[400], linkbuf[400] = "";
	int erroffset, tagpos = 0, length, lastupdate = 0;
	bool intag = false, FoundPrev = false, FoundNext = false;
	int result = 0, ovector[30];
	int linktype;

	if (!File) {
		DbgOut("NULL pointer passed to FindLinks(), incorrect URL?\n");
		return;
	}

	if (!CurComicG->entryExists("prevpattern") || !CurComicG->entryExists("nextpattern")) {
		Out("One or both link patterns are missing from the prefs file.\nNo link searching possible. Re-importing .comic may fix this.\nYou may also try the defaults if it's a Keenspot/Keenspace comic.\n");
		return;
	}

	{ char *Temp;
		CurComicG->get("linktype", Temp, "tag");
		if (!strcmp("tag", Temp)) {
			linktype = 0;
		} else if (!strcmp("text", Temp)) {
			linktype = 1;
		} else if (!strcmp("map", Temp)) {
			linktype = 2;
		} else {
			MOut("Unknown link type specified: %s\n", Temp);
			free(Temp);
			return;
		}
		free(Temp);
	}

	Out("Opening \"%s\" to find links\n", File);

	if (!(fptr = fopen(File, "r"))) {
		MOut("Error opening \"%s\"!\n", File);
		return;
	}

	fseek(fptr, 0, SEEK_END);

	length = ftell(fptr);

	fseek(fptr, 0, SEEK_SET);

	Interface.StatusRange(0.0, (float)length);
	Interface.StatusValue(0.0);
	Interface.StatusLabel("Link searching...");

	CurComicG->get("prevpattern", PrevPattern, "");
	CurComicG->get("nextpattern", NextPattern, "");

	DbgOut("PrevPattern: %s\n", PrevPattern);
	DbgOut("NextPattern: %s\n", NextPattern);

	PrevCompile = pcre_compile(PrevPattern, PCRE_CASELESS || PCRE_DOTALL, &error, &erroffset, NULL);

	if (!PrevCompile) {
		MOut("Error compiling pattern: %s\nError at offset: %d\nPattern: %s\n", error, erroffset, PrevPattern);
		free(PrevPattern);
		free(NextPattern);
		return;
	}

	NextCompile = pcre_compile(NextPattern, PCRE_CASELESS || PCRE_DOTALL, &error, &erroffset, NULL);

	if (!NextCompile) {
		MOut("Error compiling pattern: %s\nError at offset: %d\nPattern: %s\n", error, erroffset, NextPattern);
		pcre_free(PrevCompile);
		free(PrevPattern);
		free(NextPattern);
		return;
	}

	ExtractLink = pcre_compile("<a .*href=(.*?)( .*>|>)", PCRE_CASELESS || PCRE_DOTALL, &error, &erroffset, NULL);		// That is such an ugly looking regexp

	if (!ExtractLink) {			// Shouldn't ever happen, but I might tweak this at some point, who knows...
		MOut("Error compiling Link pattern: %s\nError at offset: %d\n", error, erroffset);
		pcre_free(NextCompile);
		pcre_free(PrevCompile);
		free(PrevPattern);
		free(NextPattern);
		return;
	}

	PrevExtra = pcre_study(PrevCompile, 0, &error);
	NextExtra = pcre_study(NextCompile, 0, &error);

	if (linktype == 0) {		// Tag based link search
		bool Done = false;
		while(!Done && !(FoundPrev && FoundNext)) {
			while(!Done) {		// Search for html tags
				if(!fread(&tmp, 1, 1, fptr) || feof(fptr)) {
					Done = true;
				}
				if (tmp == '<') {
					intag = true;
				}
				if (intag) {
					if (tmp != '\n' && tmp != '\r') {
						buf[tagpos++] = tmp;
					}
					if (tmp == '>') {
						buf[tagpos] = '\0';
						tagpos = 0;
						intag = false;
						break;
					}
					if (tagpos == 399) {
						tagpos = 0;
						intag = false;
						continue;
					}
				}
			}
			if ((ftell(fptr) - 2048) > lastupdate) {
				Interface.StatusValue((float)ftell(fptr));
				lastupdate = ftell(fptr);
			}
			if (!Done) {
				if (Debug) {
					DbgOut("Checking Tag against regexps:\n%s\n", buf);
				}
				if ((result = pcre_exec(ExtractLink, 0, buf, strlen(buf), 0, 0, ovector, 30)) > 0) {
					pcre_copy_substring(buf, ovector, result, 1, linkbuf, 399);
					crop(linkbuf);
				}
				result = -1;	// Reset result
				if(!FoundPrev) {	// "Prev" Link hasn't been found yet
					result = pcre_exec(PrevCompile, PrevExtra, buf, strlen(buf), 0, 0, ovector, 30);
					if (result >= 0) {
						DbgOut("Found Prev Tag Marker\n");
						DbgOut("Using: %s\n", linkbuf);
						FoundPrev = true;
						if (strlen(linkbuf)) {
							if (strncmp(linkbuf, "http://", 7)) {
								CurComicCacheGroup->set("prevlink", MakeNewURL(linkbuf, Source));
							} else {
								CurComicCacheGroup->set("prevlink", linkbuf);
							}
						}
						strcpy(linkbuf, ""); // Make sure we don't reuse the link
					}
				}
				if((result < 0) && !FoundNext) {	// "Prev" Link didn't match, or was already found, AND "Next" Link hasn't been found yet
					result = pcre_exec(NextCompile, NextExtra, buf, strlen(buf), 0, 0, ovector, 30);
					if (result >= 0) {
						DbgOut("Found Next Tag Marker\n");
						DbgOut("Using: %s\n", linkbuf);
						FoundNext = true;
						if (strlen(linkbuf)) {
							if (strncmp(linkbuf, "http://", 7)) {
								CurComicCacheGroup->set("nextlink", MakeNewURL(linkbuf, Source));
							} else {
								CurComicCacheGroup->set("nextlink", linkbuf);
							}
						}
						strcpy(linkbuf, ""); // Make sure we don't reuse the link
					}
				}
			} // if (!Done)
		} // while (!Done...
	} else if (linktype == 1) {		// Text-based link search
		bool Done = false;
		while (!Done && !(FoundPrev && FoundNext)) {
			while(!Done) {		// Search for html tags
				if(!fread(&tmp, 1, 1, fptr) || feof(fptr)) {
					Done = true;
				}
				if (tmp == '<') {
					intag = true;
				}
				if (intag) {
					buf[tagpos++] = tmp;
					if (tmp == '>') {
						buf[tagpos] = '\0';
						tagpos = 0;
						intag = false;
						break;
					}
					if (tagpos == 399) {
						tagpos = 0;
						intag = false;
						continue;
					}
				}
			} // while (!Done)
			if (!Done) {
				if ((ftell(fptr) - 2048) > lastupdate) {
					Interface.StatusValue((float)ftell(fptr));
					lastupdate = ftell(fptr);
				}
				if ((result = pcre_exec(ExtractLink, 0, buf, strlen(buf), 0, 0, ovector, 30)) > 0) {
					bool endlink = false;
					pcre_copy_substring(buf, ovector, result, 1, linkbuf, 399);
					crop(linkbuf);
					do {
						fread(&tmp, 1, 1, fptr);
						buf[tagpos++] = tmp;
						if (tagpos == 399) {		// Don't overflow
							tagpos = 0;
							buf[0] = 0;
							endlink = true;
						}
						if (tmp == '>') {	// Maybe found the end?
							if (!strncmp((buf + tagpos - 4), "</a>", 4) || !strncmp((buf + tagpos - 4), "</A>", 4)) {	// Found the end
								buf[tagpos - 4] = 0;		// Cut off the end tag, only interested in what's between them
								tagpos = 0;
								endlink = true;
							}
						}						
					} while (!endlink);
					endlink = false;
					result = -1;	// Reset result
					if(Debug) {
						DbgOut("Checking against regexps: %s\n", buf);
					}
					if(!FoundPrev) {	// "Prev" Link hasn't been found yet
						result = pcre_exec(PrevCompile, PrevExtra, buf, strlen(buf), 0, 0, ovector, 30);
						if (result >= 0) {
							DbgOut("Found Prev Tag Marker\n");
							DbgOut("Using: %s\n", linkbuf);
							FoundPrev = true;
							if (strlen(linkbuf)) {
								if (strncmp(linkbuf, "http://", 7)) {
									CurComicCacheGroup->set("prevlink", MakeNewURL(linkbuf, Source));
								} else {
									CurComicCacheGroup->set("prevlink", linkbuf);
								}
							}
							strcpy(linkbuf, ""); // Make sure we don't reuse the link
						}
					}
					if((result < 0) && !FoundNext) {	// "Prev" Link didn't match, or was already found, AND "Next" Link hasn't been found yet
						result = pcre_exec(NextCompile, NextExtra, buf, strlen(buf), 0, 0, ovector, 30);
						if (result >= 0) {
							DbgOut("Found Next Tag Marker\n");
							DbgOut("Using: %s\n", linkbuf);
							FoundNext = true;
							if (strlen(linkbuf)) {
								if (strncmp(linkbuf, "http://", 7)) {
									CurComicCacheGroup->set("nextlink", MakeNewURL(linkbuf, Source));
								} else {
									CurComicCacheGroup->set("nextlink", linkbuf);
								}
							}
							strcpy(linkbuf, ""); // Make sure we don't reuse the link
						}
					}
				}	// if
			}
		} // while (!Done...
	} else if (linktype == 2) {	// Map-based link search
		bool Done = false;
		while (!Done && !(FoundPrev && FoundNext)) {
			while(!Done) {		// Search for html tags
				if(!fread(&tmp, 1, 1, fptr) || feof(fptr)) {
					Done = true;
				}
				if (tmp == '<') {
					intag = true;
				}
				if (intag) {
					buf[tagpos++] = tmp;
					if (tmp == '>') {
						buf[tagpos] = '\0';
						tagpos = 0;
						intag = false;
						break;
					}
					if (tagpos == 399) {
						tagpos = 0;
						intag = false;
						continue;
					}
				}
			} // while (!Done)
			if (!Done) {
				result = 0;
				if ((ftell(fptr) - 2048) > lastupdate) {
					Interface.StatusValue((float)ftell(fptr));
					lastupdate = ftell(fptr);
				}
				if (!FoundPrev && ((result = pcre_exec(PrevCompile, PrevExtra, buf, strlen(buf), 0, 0, ovector, 30)) > 0)) {	// Match tag, and pull out link if applicable
					pcre_copy_substring(buf, ovector, result, 1, linkbuf, 399);
					DbgOut("Found Prev Tag Marker\n");
					DbgOut("Using: %s\n", linkbuf);
					FoundPrev = true;
					if (strlen(linkbuf)) {
						if (strncmp(linkbuf, "http://", 7)) {
							CurComicCacheGroup->set("prevlink", MakeNewURL(linkbuf, Source));
						} else {
							CurComicCacheGroup->set("prevlink", linkbuf);
						}
					}
					strcpy(linkbuf, ""); // Make sure we don't reuse the link
				}
				if (!FoundNext && ((result = pcre_exec(NextCompile, NextExtra, buf, strlen(buf), 0, 0, ovector, 30)) > 0)) {	// Match tag, and pull out link if applicable
					pcre_copy_substring(buf, ovector, result, 1, linkbuf, 399);
					DbgOut("Found Next Tag Marker\n");
					DbgOut("Using: %s\n", linkbuf);
					FoundNext = true;
					if (strlen(linkbuf)) {
						if (strncmp(linkbuf, "http://", 7)) {
							CurComicCacheGroup->set("nextlink", MakeNewURL(linkbuf, Source));
						} else {
							CurComicCacheGroup->set("nextlink", linkbuf);
						}
					}
					strcpy(linkbuf, ""); // Make sure we don't reuse the link
				}
			}
		} // while (!Done...
	}

	Out("Done Link Searching\n");

	pcre_free(PrevCompile);
	pcre_free(NextCompile);
	pcre_free(PrevExtra);
	pcre_free(NextExtra);
	pcre_free(ExtractLink);
	free(PrevPattern);
	free(NextPattern);

	fclose(fptr);

	Interface.StatusLabel("");
	Interface.StatusValue(0.0);

}
