/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2005  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "pch.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "UrlPatternsFile.h"
#include "UrlPatterns.h"
#include "TextPattern.h"
#include "ConfError.h"
#include "ConfErrorHandler.h"
#include "IntrusivePtr.h"
#include "StringUtils.h"
#include "InsensitiveEqual.h"
#include <memory>
#include <istream>
#include <ostream>
#include <cctype>
#include <algorithm>
#include <stddef.h>
#include <cassert>

using namespace std;

class UrlPatternsFile::Loader
{
public:
	Loader(std::list<Element>& elements,
		UrlPatterns& target, ConfErrorHandler& eh);
	
	void load(BString const& text);
private:
	bool processInputLine(BString const& line, int lineno);
	
	bool processInputPattern(BString const& tag,
		BString const& pattern, int lineno);
	
	bool processHintPattern(
		BString const& tag, BString const& pattern,
		UrlPatterns::PatternPtr const& tp, int lineno);
	
	std::list<Element>& m_rElements;
	UrlPatterns& m_rTarget;
	ConfErrorHandler& m_rErrorHandler;
};


void
UrlPatternsFile::clear()
{
	m_elements.clear();
}

void
UrlPatternsFile::load(
	std::string const& text, UrlPatterns& target, ConfErrorHandler& eh)
{
	clear();
	Loader loader(m_elements, target, eh);
	loader.load(BString(text));
}

void
UrlPatternsFile::swap(UrlPatternsFile& other)
{
	m_elements.swap(other.m_elements);
}


/*==================== UrlPatternsFile::Loader =========================*/

UrlPatternsFile::Loader::Loader(
	std::list<Element>& elements, UrlPatterns& target, ConfErrorHandler& eh)
:	m_rElements(elements),
	m_rTarget(target),
	m_rErrorHandler(eh)
{
}

void
UrlPatternsFile::Loader::load(BString const& text)
{
	int lineno = 1;
	char const* pos = text.begin();
	char const* const end = text.end();
	while (pos != end) {
		char const* const nlpos = StringUtils::find(pos, end, '\n');
		BString line(text, pos, nlpos);
		processInputLine(line, lineno);
		if (nlpos == end) { // newline not found
			break;
		} else {
			pos = nlpos + 1;
			++lineno;
		}
	}
}

bool
UrlPatternsFile::Loader::processInputLine(
	BString const& line, int lineno)
{
	char const* begin = line.begin();
	char const* end = line.end();
	begin = StringUtils::ltrim(begin, end);
	end = StringUtils::rtrim(begin, end);
	if (begin == end) {
		m_rElements.push_back(Element());
		return true;
	}
	if (*begin == '#' || *begin == ';') {
		++begin;
		BString comment(line, begin, end);
		m_rElements.push_back(Element(comment));
		return true;
	}
	
	char const* space_pos = begin;
	for (; space_pos != end && !isspace(*space_pos); ++space_pos) {
		// skip non-spaces
	}
	BString tag(line, begin, space_pos);
	
	char const* space_end = space_pos;
	for (; space_end != end && isspace(*space_end); ++space_end) {
		// skip spaces
	}
	BString pattern(line, space_end, end);
	
	return processInputPattern(tag, pattern, lineno);
}

bool
UrlPatternsFile::Loader::processInputPattern(
	BString const& tag, BString const& pattern, int lineno)
{
	if (tag.empty() || pattern.empty()) {
		ConfError err(
			ConfError::T_ERROR,
			"Parse error", lineno
		);
		return m_rErrorHandler.handleError(err);
	}
	
	InsensitiveEqual ieq;
	
	try {
		namespace rc = boost::regex_constants;
		UrlPatterns::PatternPtr tp(new TextPattern(
			pattern.toStdString(),
			rc::normal|rc::icase|rc::nosubs|rc::optimize
		));
		if (ieq(tag, BString("forbid"))) {
			m_rTarget.addSubstitutionPattern(tp, UrlPatterns::FORBID_HTML);
			m_rElements.push_back(Element(Element::TAG_FORBID, tag, pattern));
		} else if (ieq(tag, BString("allow"))) {
			m_rTarget.addAllowPattern(tp);
			m_rElements.push_back(Element(Element::TAG_ALLOW, tag, pattern));
		} else if (ieq(tag, BString("nofilter"))) {
			m_rTarget.addNoFilterPattern(tp);
			m_rElements.push_back(Element(Element::TAG_NOFILTER, tag, pattern));
		} else if (ieq(tag, BString("html"))) {
			m_rTarget.addSubstitutionPattern(tp, UrlPatterns::EMPTY_HTML);
			m_rElements.push_back(Element(Element::TAG_SUBST_HTML, tag, pattern));
		} else if (ieq(tag, BString("image"))) {
			m_rTarget.addSubstitutionPattern(tp, UrlPatterns::EMPTY_IMAGE);
			m_rElements.push_back(Element(Element::TAG_SUBST_IMAGE, tag, pattern));
		} else if (ieq(tag, BString("flash"))) {
			m_rTarget.addSubstitutionPattern(tp, UrlPatterns::EMPTY_FLASH);
			m_rElements.push_back(Element(Element::TAG_SUBST_FLASH, tag, pattern));
		} else if (ieq(tag, BString("js"))) {
			m_rTarget.addSubstitutionPattern(tp, UrlPatterns::EMPTY_JS);
			m_rElements.push_back(Element(Element::TAG_SUBST_JS, tag, pattern));
		} else if (ieq(tag, BString("ad"))) {
			m_rTarget.addSubstitutionPattern(tp, UrlPatterns::AUTO_SUBST);
			m_rElements.push_back(Element(Element::TAG_SUBST_AUTO, tag, pattern));
		} else if (tag[0] == '+' || tag[0] == '-') {
			return processHintPattern(tag, pattern, tp, lineno);
		} else {
			ConfError err(
				ConfError::T_WARNING,
				"Unknown tag: "+tag.toStdString(), lineno
			);
			return m_rErrorHandler.handleError(err);
		}
	} catch (boost::bad_expression& e) {
		ConfError err(
			ConfError::T_ERROR,
			string("Bad regex: ")+e.what(), lineno
		);
		return m_rErrorHandler.handleError(err);
	}
	
	return true;
}

bool
UrlPatternsFile::Loader::processHintPattern(
	BString const& tag, BString const& pattern,
	UrlPatterns::PatternPtr const& tp, int lineno)
{
	assert(!tag.empty());
	
	int const sign = (tag[0] == '+' ? 1 : -1);
	int hint = 0;
	bool error = false;
	
	if (tag.size() == 1) {
		hint = sign;
	} else if (isdigit(tag[1])) {
		char const* end = tag.end();
		hint = sign * StringUtils::parseUnsigned<int>(tag.begin()+1, end);
		if (end != tag.end()) {
			error = true;
		}
	} else {
		if ((size_t)std::count(tag.begin(), tag.end(), tag[0]) == tag.size()) {
			hint = sign * tag.size();
		} else {
			error = true;
		}
	}

	if (error) {
		ConfError err(ConfError::T_ERROR, "Bad hint", lineno);
		return m_rErrorHandler.handleError(err);
	}
	
	m_rTarget.addHintPattern(tp, hint);
	m_rElements.push_back(Element(
		tag[0] == '+'
		? Element::TAG_POSITIVE_HINT
		: Element::TAG_NEGATIVE_HINT,
		tag, pattern
	));
	
	return true;
}
