/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include "constants.h"


ustring book;
int chapter;
ustring verse;
vector<ustring> characters;
vector<unsigned int> charcount;


void start_element_handler (GMarkupParseContext *context,
                            const gchar         *element_name,
                            const gchar        **attribute_names,
                            const gchar        **attribute_values,
                            gpointer             user_data,
                            GError             **error)
{
  string element = element_name;
  if (element == BOOK_TAG) {
    // A book starts. Get the name of the book.    
    book = attribute_values[0];
  } 
  else if (element == CHAPTER_TAG) {
    // A chapter starts. Gets its number.
    chapter = convert_to_int (attribute_values[0]);
  } 
  else if (element == VERSE_TAG ) {
    // A verse starts. Store it.
    verse = attribute_values[0];
  }
}


void end_element_handler (GMarkupParseContext *context,
                          const gchar         *element_name,
                          gpointer             user_data,
                          GError             **error)
{
  string element = element_name;
  if (element == BOOK_TAG) {
    // We've reached the end of the book. 
  } else if (element == CHAPTER_TAG) {
    // We've reached the end of a chapter.
  } else if (element == VERSE_TAG ) {
    // We are at the end of a verse.
  }
}


void text_handler (GMarkupParseContext *context,
                   const gchar         *text,
                   gsize                text_len,
                   gpointer             user_data,
                   GError             **error)
{
  ustring utext (text);
  for (unsigned int i = 0; i < utext.size(); i++) {
    ustring character;
    character = utext.substr (i, 1);
    bool character_found = false;
    for (unsigned int i2 = 0; i2 < characters.size(); i2++) {
      if (character == characters[i2]) {
        character_found = true;
        charcount[i2]++;
        break;
      }
    }
    if (!character_found) {
      characters.push_back (character);
      charcount.push_back (1);
    }
  }
}



void passthrough_handler    (GMarkupParseContext *context,
                             const gchar         *passthrough_text,
                             gsize                text_len,
                             gpointer             user_data,
                             GError             **error)
{
}


void error_handler          (GMarkupParseContext *context,
                             GError              *error,
                             gpointer             user_data)
{
  cerr << error->message << endl;
}


int main (int argc, char *argv[])
{
  // Information provided when no arguments are given.
  if (argc == 1) {
    cout << "sc-count-characters reads checking units from stdin," << endl;
    cout << "counts all characters, and outputs its report on stdout." << endl;
    cout << "Commandline arguments:" << endl;
    cout << "--no-sort" << endl;
    cout << "--sort-on-character" << endl;
    cout << "--sort-on-count" << endl;
    cout << "--invisible-characters" << endl;
    cout << "  Changes some invisible characters to <newline>, etc." << endl;
    // Do not read stdin in this case because it would hang.
    return 0;
  }
  // Process command line arguments.
  bool sort_on_character = false;
  bool sort_on_count = false;
  bool write_out_invisibles = false;
  for (int i = 1; i < argc; i++) {
    ustring argument;
    argument = argv[i];
    if (argument.length() > 2) {
      if (argument.substr (0, 2) == "--") {
        argument.erase (0, 2);
        if (argument == "sort-on-character") {
          sort_on_character = true;
        }
        if (argument == "sort-on-count") {
          sort_on_count = true;
        }
        if (argument == "invisible-characters") {
          write_out_invisibles = true;
        }
      }
    }
  }
  // Read data from stdin.
  GIOChannel* io;
  gchar* text;
  gsize length;
  io = g_io_channel_unix_new (0);
  g_io_channel_read_to_end (io, &text, &length, NULL);
  // Set up parser.
  GMarkupParseContext *context;
  GMarkupParser parser = {
    start_element_handler,
    end_element_handler,
    text_handler,
    passthrough_handler,
    error_handler
  };
  // Parse xml data.
  context = g_markup_parse_context_new (&parser, GMarkupParseFlags (0), NULL, NULL);
  g_markup_parse_context_parse (context, text, length, NULL);
  g_markup_parse_context_end_parse (context, NULL);
  // Free some resources.  
  g_markup_parse_context_free (context);
  g_free (text);
  g_io_channel_unref (io);
  // Sort on character, if need be.
  if (sort_on_character) {
    quick_sort (characters, charcount, 0, charcount.size());
  }
  // Sort on count, if need be.
  if (sort_on_count) {
    quick_sort (charcount, characters, 0, charcount.size());
  }
  // Output the data.
  for (unsigned int i = 0; i < characters.size(); i++) {
    cout << xml_tag (0, MESSAGE_TAG, false) << endl;
    ustring xml;
    ustring character = characters[i];
    // Deal with invisible characters.
    if (write_out_invisibles) {
      if (characters[i] == "\n")
        character = "newline";
      if (characters[i] == " ")
        character = "space";
    }
    xml = xml_text_embed_in_tags (1, CHARACTER_TAG, character);
    write (1, xml.c_str(), strlen (xml.c_str()));
    write (1, "\n", 1);
    gunichar unichar;
    {
      gunichar * uc;
      uc = g_utf8_to_ucs4_fast (characters[i].c_str(), -1, NULL);
      unichar = * uc;
      g_free (uc);
    }
    cout << xml_tag (1, DECIMAL_ENTITITY_REFERENCE_TAG, false);
    printf ("%d", unichar);
    cout << xml_tag (0, DECIMAL_ENTITITY_REFERENCE_TAG, true) << endl;
    cout << xml_tag (1, HEXADECIMAL_ENTITITY_REFERENCE_TAG, false);
    printf ("U+%04X", unichar);
    cout << xml_tag (0, HEXADECIMAL_ENTITITY_REFERENCE_TAG, true) << endl;
    cout << xml_text_embed_in_tags (1, COUNT_TAG, convert_to_string (charcount[i])) << endl;
    cout << xml_tag (0, MESSAGE_TAG, true) << endl;
  }
  // Ready.
  return 0;
}


/*
Notes.

Paratext can find invalid characters, but as we have Unicode 
where all characters are valid, there is no need for this check.

Diacritics usage in Paratext. It shows all character sequences that have 
diacritics, how many times each sequence occurs, and the reference where it 
occurs. We don't use it as we have Unicode, and that views diacritics as any 
other character.
*/
