/*
 * Main part of code, written by:
 *
 * Copyright (C) 1999-2001  Hvard Kvlen <havardk@xmms.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 *
 */

#include <config.h>
#include <stdlib.h>
#include <glib.h>
#include <string.h>
#include <errno.h>
#include <glib/gi18n-lib.h>

#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif

#include "charset.h"
#include "setting.h"



/****************
 * Declarations *
 ****************/

#define CHARSET_TRANS_ARRAY_LEN ( sizeof(charset_trans_array) / sizeof((charset_trans_array)[0]) )
const CharsetInfo charset_trans_array[] = { 
    {N_("Arabic (IBM-864)"),                  "IBM864"        },
    {N_("Arabic (ISO-8859-6)"),               "ISO-8859-6"    },
    {N_("Arabic (Windows-1256)"),             "windows-1256"  },
    {N_("Baltic (ISO-8859-13)"),              "ISO-8859-13"   },
    {N_("Baltic (ISO-8859-4)"),               "ISO-8859-4"    },
    {N_("Baltic (Windows-1257)"),             "windows-1257"  },
    {N_("Celtic (ISO-8859-14)"),              "ISO-8859-14"   },
    {N_("Central European (IBM-852)"),        "IBM852"        },
    {N_("Central European (ISO-8859-2)"),     "ISO-8859-2"    },
    {N_("Central European (Windows-1250)"),   "windows-1250"  },
    {N_("Chinese Simplified (GB18030)"),      "gb18030"       },
    {N_("Chinese Simplified (GB2312)"),       "GB2312"        },
    {N_("Chinese Traditional (Big5)"),        "Big5"          },
    {N_("Chinese Traditional (Big5-HKSCS)"),  "Big5-HKSCS"    },
    {N_("Cyrillic (IBM-855)"),                "IBM855"        },
    {N_("Cyrillic (ISO-8859-5)"),             "ISO-8859-5"    },
    {N_("Cyrillic (ISO-IR-111)"),             "ISO-IR-111"    },
    {N_("Cyrillic (KOI8-R)"),                 "KOI8-R"        },
    {N_("Cyrillic (Windows-1251)"),           "windows-1251"  },
    {N_("Cyrillic/Russian (CP-866)"),         "IBM866"        },
    {N_("Cyrillic/Ukrainian (KOI8-U)"),       "KOI8-U"        },
    {N_("English (US-ASCII)"),                "us-ascii"      },
    {N_("Greek (ISO-8859-7)"),                "ISO-8859-7"    },
    {N_("Greek (Windows-1253)"),              "windows-1253"  },
    {N_("Hebrew (IBM-862)"),                  "IBM862"        },
    {N_("Hebrew (Windows-1255)"),             "windows-1255"  },
    {N_("Japanese (EUC-JP)"),                 "EUC-JP"        },
    {N_("Japanese (ISO-2022-JP)"),            "ISO-2022-JP"   },
    {N_("Japanese (Shift_JIS)"),              "Shift_JIS"     },
    {N_("Korean (EUC-KR)"),                   "EUC-KR"        },
    {N_("Nordic (ISO-8859-10)"),              "ISO-8859-10"   },
    {N_("South European (ISO-8859-3)"),       "ISO-8859-3"    },
    {N_("Thai (TIS-620)"),                    "TIS-620"       },
    {N_("Turkish (IBM-857)"),                 "IBM857"        },
    {N_("Turkish (ISO-8859-9)"),              "ISO-8859-9"    },
    {N_("Turkish (Windows-1254)"),            "windows-1254"  },
    {N_("Unicode (UTF-7)"),                   "UTF-7"         },
    {N_("Unicode (UTF-8)"),                   "UTF-8"         },
    {N_("Unicode (UTF-16BE)"),                "UTF-16BE"      },
    {N_("Unicode (UTF-16LE)"),                "UTF-16LE"      },
    {N_("Unicode (UTF-32BE)"),                "UTF-32BE"      },
    {N_("Unicode (UTF-32LE)"),                "UTF-32LE"      },
    {N_("Vietnamese (VISCII)"),               "VISCII"        },
    {N_("Vietnamese (Windows-1258)"),         "windows-1258"  },
    {N_("Visual Hebrew (ISO-8859-8)"),        "ISO-8859-8"    },
    {N_("Western (IBM-850)"),                 "IBM850"        },
    {N_("Western (ISO-8859-1)"),              "ISO-8859-1"    },
    {N_("Western (ISO-8859-15)"),             "ISO-8859-15"   },
    {N_("Western (Windows-1252)"),            "windows-1252"  }

    /*
     * From this point, character sets aren't supported by iconv
     */
/*    {N_("Arabic (IBM-864-I)"),                "IBM864i"              },
    {N_("Arabic (ISO-8859-6-E)"),             "ISO-8859-6-E"         },
    {N_("Arabic (ISO-8859-6-I)"),             "ISO-8859-6-I"         },
    {N_("Arabic (MacArabic)"),                "x-mac-arabic"         },
    {N_("Armenian (ARMSCII-8)"),              "armscii-8"            },
    {N_("Central European (MacCE)"),          "x-mac-ce"             },
    {N_("Chinese Simplified (GBK)"),          "x-gbk"                },
    {N_("Chinese Simplified (HZ)"),           "HZ-GB-2312"           },
    {N_("Chinese Traditional (EUC-TW)"),      "x-euc-tw"             },
    {N_("Croatian (MacCroatian)"),            "x-mac-croatian"       },
    {N_("Cyrillic (MacCyrillic)"),            "x-mac-cyrillic"       },
    {N_("Cyrillic/Ukrainian (MacUkrainian)"), "x-mac-ukrainian"      },
    {N_("Farsi (MacFarsi)"),                  "x-mac-farsi"},
    {N_("Greek (MacGreek)"),                  "x-mac-greek"          },
    {N_("Gujarati (MacGujarati)"),            "x-mac-gujarati"       },
    {N_("Gurmukhi (MacGurmukhi)"),            "x-mac-gurmukhi"       },
    {N_("Hebrew (ISO-8859-8-E)"),             "ISO-8859-8-E"         },
    {N_("Hebrew (ISO-8859-8-I)"),             "ISO-8859-8-I"         },
    {N_("Hebrew (MacHebrew)"),                "x-mac-hebrew"         },
    {N_("Hindi (MacDevanagari)"),             "x-mac-devanagari"     },
    {N_("Icelandic (MacIcelandic)"),          "x-mac-icelandic"      },
    {N_("Korean (JOHAB)"),                    "x-johab"              },
    {N_("Korean (UHC)"),                      "x-windows-949"        },
    {N_("Romanian (MacRomanian)"),            "x-mac-romanian"       },
    {N_("Turkish (MacTurkish)"),              "x-mac-turkish"        },
    {N_("User Defined"),                      "x-user-defined"       },
    {N_("Vietnamese (TCVN)"),                 "x-viet-tcvn5712"      },
    {N_("Vietnamese (VPS)"),                  "x-viet-vps"           },
    {N_("Western (MacRoman)"),                "x-mac-roman"          },
    // charsets whithout possibly translatable names
    {"T61.8bit",                              "T61.8bit"             },
    {"x-imap4-modified-utf7",                 "x-imap4-modified-utf7"},
    {"x-u-escaped",                           "x-u-escaped"          },
    {"windows-936",                           "windows-936"          }
*/
};




/*************
 * Functions *
 *************/


gchar *convert_string (const gchar *string, const gchar *from, const gchar *to)
{
    gchar *output;
    GError *error = NULL;

    if (!string)
        return NULL;

    output = g_convert(string, -1, to, from, NULL, NULL, &error);

    //g_message("converting %s from %s to %s", string, from, to);

    if (output == NULL)
    {
        gchar *escaped_str = g_strescape(string, NULL);
        g_warning("convert_string(): Failed conversion from charset '%s' to '%s'. "
                  "String '%s'. Errcode %d (%s).\n",
                  from, to, escaped_str, error->code, error->message);
        g_free(escaped_str);
        g_error_free(error);
        return g_strdup(string);
    }

    return output;
}


/*
 * Conversion with UTF-8 for Ogg Vorbis and FLAC tags (current_charset <===> UTF-8)
 */
gchar *convert_to_utf8 (const gchar *string)
{
    gchar *output;
    GError *error = NULL;

    if (!string)
        return NULL;

    output = g_locale_to_utf8(string, -1, NULL, NULL, &error);

    if (output == NULL)
    {
        const gchar *usercharset;
        gchar *escaped_str = g_strescape(string, NULL);
        g_get_charset(&usercharset);
        g_warning("convert_to_utf8(): Failed conversion from charset '%s'. "
                  "String '%s'. Errcode %d (%s).\n",
                  usercharset, escaped_str, error->code, error->message);
        g_free(escaped_str);

        if (g_utf8_validate(string, -1, NULL))
            g_warning("convert_to_utf8(): String was valid UTF8.\n");
        else
            g_warning("convert_to_utf8(): String was INVALID UTF8.\n");

        g_error_free(error);
        return g_strdup(string);
    }

    return output;
}

gchar *convert_from_utf8 (const char *string)
{
    gchar *output;
    GError *error = NULL;

    if (!string)
        return NULL;

    output = g_locale_from_utf8(string, -1, NULL, NULL, &error);

    if (output == NULL)
    {
        const gchar *usercharset;
        gchar *escaped_str = g_strescape(string, NULL);
        g_get_charset(&usercharset);
        g_warning("convert_from_utf8(): Failed conversion to charset '%s'. "
                  "String '%s'. Errcode %d (%s).\n",
                  usercharset, escaped_str, error->code, error->message);
        g_free(escaped_str);

        if (g_utf8_validate(string, -1, NULL))
            g_warning("convert_from_utf8(): String was valid UTF8.\n");
        else
            g_warning("convert_from_utf8(): String was INVALID UTF8.\n");

        g_error_free(error);
        return g_strdup(string);
    }

    return output;
}



/*
 * Conversion with ISO-8859-1 for ID3v2.3 tags (current_charset <===> ISO-8859-1)
 */
char *convert_to_iso88591 (const char *string)
{
    const gchar *charset;
    g_get_charset(&charset);

    /* No conversion needed */
    if (strcmp(charset, "ANSI_X3.4-1968") == 0)
        return g_strdup(string);

    return convert_string(string, charset, "ISO-8859-1");
}

char *convert_from_iso88591 (const char *string)
{
    const gchar *charset;
    g_get_charset(&charset);

    /* No conversion needed */
    if (strcmp(charset, "ANSI_X3.4-1968") == 0)
        return g_strdup(string);

    return convert_string(string, "ISO-8859-1", charset);
}



/*
 * Conversion with "this_charset" for ID3v2.3 tags (current_charset <===> this_charset)
 */
// Convert from the locale charset to 'this_charset'
char *convert_to_this_charset (const char *string, char *this_charset)
{
    const gchar *charset;
    g_get_charset(&charset);

    return convert_string(string, charset, this_charset);
}

// Convert from 'this_charset' to the locale charset
char *convert_from_this_charset (const char *string, char *this_charset)
{
    const gchar *charset;
    g_get_charset(&charset);

    return convert_string(string, this_charset, charset);
}



/*
 * Conversion functions using default parameters set by user in the preference window. (USER_CHARACTER_SET <===> FILE_CHARACTER_SET)
 */
char *convert_from_user_to_file (const char *string)
{
    char *file_charset = FILE_CHARACTER_SET;
    char *user_charset = USER_CHARACTER_SET;

    return convert_string(string,user_charset,file_charset);
}

char *convert_from_file_to_user (const char *string)
{
    char *file_charset = FILE_CHARACTER_SET;
    char *user_charset = USER_CHARACTER_SET;

    return convert_string(string,file_charset,user_charset);
}


/*
 * Functions to translate filename to/from UTF-8
 * Based around the ideas under "File Name Encodings" at
 *    http://developer.gnome.org/doc/API/2.0/glib/glib-Character-Set-Conversion.html
 */
gchar *filename_to_display (const gchar *string)
{
    GError *error = NULL;
    gchar *temp = g_filename_to_utf8(string, -1, NULL, NULL, &error);
    if (!temp)
    {
        // Conversion KO!
        gchar *escaped_str = g_strescape(string, NULL);
        g_warning(_("The filename '%s' couldn't be converted to UTF-8. "
                    "(Try setting the environment variable G_FILENAME_ENCODING): %s\n"),
                    escaped_str, error->message ? error->message : _("Invalid UTF-8"));
        //g_free(escaped_str);
        g_clear_error(&error);

        //return g_strdup(string);
        return g_strdup(escaped_str); // Don't free escaped_str if used!
    }else
    {
        // Conversion OK
        return temp;
    }
}

gchar *filename_from_display (const gchar* string)
{
    gchar *temp = g_filename_from_utf8(string, -1, NULL, NULL, NULL);
    if (!temp)
    {
        // Conversion KO!
        gchar *escaped_str = g_strescape(string, NULL);
        g_print("WARNING: Could not convert string %s into filename encoding\n", escaped_str);
        g_free(escaped_str);
    }

    return temp; // We need to catch errors (e.g. temp=NULL) in the real code
}


void Charset_Populate_Combobox (GtkComboBox *combo, gchar *select_charset)
{
    guint i;

    for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
    {
        gtk_combo_box_append_text(combo, _(charset_trans_array[i].charset_title));

        if (select_charset && strcmp(charset_trans_array[i].charset_name, select_charset) == 0)
            gtk_combo_box_set_active(combo, i);
    }
}


/*
 * Return charset_name from charset_title
 */
gchar *Charset_Get_Name_From_Title (const gchar *charset_title)
{
    guint i;

    if (charset_title)
        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
            if ( strcasecmp(_(charset_title),_(charset_trans_array[i].charset_title)) == 0 )
                return charset_trans_array[i].charset_name;
    return "";
}


/*
 * Return charset_title from charset_name
 */
gchar *Charset_Get_Title_From_Name (gchar *charset_name)
{
    guint i;

    if (charset_name)
        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
            if ( strcasecmp(charset_name,charset_trans_array[i].charset_name) == 0 )
                return _(charset_trans_array[i].charset_title);
    return "";
}



/*
 * Test if the conversion is supported between two character sets ('from' and 'to)
 */

gboolean test_conversion_charset (const gchar *from, const gchar *to)
{
    gchar *temp;
    GError *error = NULL;

    if (!from || !to)
        return FALSE;
    
    // Do a quick test conversion and examine error output
    temp = g_convert("a", -1, to, from, NULL, NULL, &error);

    if (!temp)
    {
        // Error in conversion
        if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_NO_CONVERSION)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_ILLEGAL_SEQUENCE)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_FAILED)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_FAILED)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_PARTIAL_INPUT)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_PARTIAL_INPUT)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_BAD_URI)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_BAD_URI)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_NOT_ABSOLUTE_PATH)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_NOT_ABSOLUTE_PATH)\n",from,to);
        } else
        {
            g_print("Conversion error from '%s' to '%s' (unknown : %d)\n",from,to,error->code);
        }
        
        if (error) g_error_free(error);
        return FALSE;
    } else
    {
        // No error
        if (error) g_error_free(error);
        g_free(temp);
        return TRUE;
    }
}
