/*
 *  Copyright (C) 2002,2003  David A Knight
 *  Copyright (C) 2001  Linus Walleij
 *  
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */



#include <config.h>

#include <ctype.h>

#include <string.h>

#include <glib.h>

#include "nomad-id3-filename.h"

static const gchar *chomp_strings[] = {
	/* What a decoder is it that puts !s at the end of every filename
	 * and a N01Songtitle thing at the beginning of the file??? */
	"!s",
	/* Beatforge and friends - put your SIGs in the ID3 tag plz */
	" bf",
	" bftop",
	" xtd bftop",
	" nbd",
	" 12i",
	" 12inch",
	" oz",
	" wax",
	" atm",
	" we",
	" ii",
	/* From some stuid man that signifies "disks" with d1 for disc 1 etc */
	" d1",
	" d2",
	" d3",
	" d4",
	" d5",
	/* Remove trailing dash */
	"-",
	NULL
};

static const gchar *replacement_strings[] = {
	/* Some obvious mistakes I've found... 
	 * add all things you find erroneously named in here. */
	"Dj ","DJ ",
	"Djs ","DJs ",
	"DJ - ","DJ ",
	"Ac - Dc ","AC-DC ",
	"Acdc ","AC-DC ",
	"Ebtg ","Everything But the Girl ",
	"Bjrk ","Bj\x00F6rk ",
	"Bjrn ","Bj\x00F6rn ",
	"Beastieboys ","Beastie Boys ",
	"C - Tec","C-Tec",
	"Ccr ","CCR ",
	"Atr ","ATR ",
	"Abba ","ABBA ",
	"Rmb ","RMB ",
	"Zz Top ","ZZ Top ",
	"A - Ha ","A-Ha ",
	"B - Charme ","B-Charme ",
	"C - Block ","C-Block ",
	"D - Tune ","D-Tune ",
	"E - Type ","E-Type ",
	"X - Perience ","X-Perience ",
	"Dee - Lite ","Dee-Lite ",
	"Run - Dmc ","Run-DMC ",
	"Run Dmc ","Run-DMC ",
	"Ann - Lie ","Ann-Lie ",
	"Cajsa - Stina ","Cajsa-Stina ",
	" - Rmx"," RMX",
	" Rmx"," RMX",
	" - Remix"," RMX",
	" Remix"," RMX",
	" Ft. "," ft ",
	" Feat "," ft ",
	" Feat. "," ft ",
	NULL
};

static void replacechars( gchar *str, gchar f, gchar r )
{
	while( *str != '\0' ) {
		if( *str == f ) {
			*str = r;
		}
		str ++;
	}
}

static gchar *replacestring( gchar *str, const gchar *f, const gchar *r )
{
	GString *temp;
	gchar *orig;
	gchar *loc;
	gssize len;

	temp = g_string_new( "" );
	orig = str;
	len = strlen( f );

	while( ( loc = strstr( str, f ) ) ) {
		g_string_append_len( temp, str, loc - str );
		g_string_append( temp, r );
		str = loc + len;
	}
	g_string_append( temp, str );

	g_free( orig );
	
	str = temp->str;
	g_string_free( temp, FALSE );

	return str;
}

static gint number_of_dashes( const gchar *string )
{
	gint num = 0;
	
	while( ( string = strchr( string, '-' ) ) ) {
		num ++;
		string ++;
	}

	return num;
}

static gchar *kill_tracknumber( gchar *string )
{
	/* If something beginning with two figures is followed
	 * by something that is not a figure, we assume it is
	 * a track number and NUKE IT. */
	if( strlen( string ) > 3 ) {
		gchar *tmp;
		gunichar c1;
		gunichar c2;
		gunichar c3;

		c1 = g_utf8_get_char( string );
		tmp = g_utf8_next_char( string );
		c2 = g_utf8_get_char( tmp );
		tmp = g_utf8_next_char( tmp );
		c3 = g_utf8_get_char( tmp );

		if( g_unichar_isdigit( c1 ) &&
		    g_unichar_isdigit( c2 ) &&
		    ! g_unichar_isdigit( c3 ) ) {
			tmp = g_strdup( tmp );
			tmp = g_strstrip( tmp );
			g_free( string );
			string = tmp;
		}
	}
	return string;
}

static void stripdblspaces( gchar *string )
{
	/* Remove any double spaces in the string 
	 * (does not deallocate memory) */
	gchar *tmp;
	gchar *tmp2;
	
	if( strlen( string ) > 1 ) {
		tmp = string + 1;
		while( *tmp ) {
			if( *tmp == ' ' && *(tmp-1) == ' ' ) {
				tmp2 = tmp + 1;
				while( *tmp2 ) {
					*(tmp2 - 1) = *tmp2;
					tmp2 ++;
				}
				*(tmp2 - 1) = '\0';
			}
			tmp ++;
		}
	}
}

static void capitalize(gchar *string)
{
	/* Markov process that capitalizes every letter 
	 * in the beginning of a word - hm if you're a
	 * communist, perhaps you don't like the name
	 * of this function? */
	gchar *tmp;
	
	if( strlen(string) > 1 ) {
		string[ 0 ] = toupper( string[ 0 ] );
		tmp = string + 1;
		while( *tmp ) {
			if( *( tmp - 1 ) == ' ' ) {
				*tmp = toupper( *tmp );
			}
			tmp ++;
		}
	}
}

static gchar *kill_braces( gchar *string )
{
	/* Some people insist on naming their files:
	 * [Artist name] Trackname - remove such crap
	 * and replace with the more common
	 * Artist name - Trackname */
	if( *string == '[' ) {
		gchar *tmp;
		
		tmp = strchr( string, ']' );
		if( tmp ) {
			tmp = g_strconcat( string + 1, " - ", tmp + 1, NULL );
			g_free( string );
			string = tmp;
		}
	}
	return string;
}

static gchar *cleanup_dirname( gchar *dir )
{
	/* remove underscores */
	replacechars( dir, '_', ' ' );

	return dir;
}

static gchar *cleanup_filename( gchar *file )
{
	gssize len;
	gchar *temp;
	guint i;

	len = g_utf8_strlen( file, -1 );

	temp = g_utf8_strdown( file, len );
	g_free( file );
	file = temp;

	/* remove underscores */
	replacechars( file, '_', ' ' );

	/* Find some clear candidates of junk at the
	 * end of the filename and kill 'em */
	for( i = 0; chomp_strings[ i ]; ++ i ) {
		guint clen;

		len = strlen( file );
		clen = strlen( chomp_strings[ i ] );

		if( ! strcmp( chomp_strings[ i ], file + len - clen ) ) {
			gchar *temp;

			temp = g_strndup( file, len - clen );
			g_free( file );
			file = temp;
		}
	}

	/* Another fix for the stupid encoder that names all files
	 * insanely and puts !s at the end of then filename */
	len = strlen( file );
	if( file[ 0 ] == 'n' && len > 4 ) {
		gchar *tmp;
		gunichar c1;
		gunichar c2;

		tmp = g_utf8_next_char( file );
		c1 = g_utf8_get_char( tmp );
		tmp = g_utf8_next_char( tmp );
		c2 = g_utf8_get_char( tmp );

		if( g_unichar_isdigit( c1 ) &&
		    g_unichar_isdigit( c2 ) ) {
			temp = g_strdup( tmp );
			g_free( file );
			file = temp;
		}
	}
	
	file = g_strstrip( file );

	return file;
}

static gchar *beautify_string( gchar *string )
{
	gint i;
	
	/* See comment in this function for details */
	string = kill_braces( string );

	/* Obvious misuse of dashes */
	if( number_of_dashes( string ) > 3 ) {
		replacechars( string, '-', ' ' );
	}

	/* Kill that stoopid paranthesis indenting */
	string = replacestring( string, "( ", "(" );
	string = replacestring( string, " )", ")" );
	string = replacestring( string, "[ ", "[" );
	string = replacestring( string, " ]", "]" );

	/* Lighten crunched parantheses */
	string = replacestring(string, ")[", ") [" );
	string = replacestring(string, "](", "] (" );

	/* Fix up spacing (OK this is crude too) */
	string = replacestring( string,"(", " (" );
	string = replacestring( string,")", ") " );
	string = replacestring( string,"[", " [" );
	string = replacestring( string,"]", "] " );

	/* This is a case of insanely genious programming */
	string = replacestring( string, "--", "-" );
	string = replacestring( string, "-", " - " );

	/* So it corrects its own mistakes (even cruder) */
	string = replacestring( string, "  (", " (" );
	string = replacestring( string, ")  ", ") " );
	string = replacestring( string, "  [", " [" );
	string = replacestring( string, "]  ", "] " );
	string = replacestring( string, "] .", "]." );
	string = replacestring( string, ") .", ")." );

	/* Replace all kind of double spaces */
	stripdblspaces( string );

	/* Remove space in the end or beginning of string */
	string = g_strstrip( string );

	/* We want to capitalize the first letter in every word as
	 * is common in titles, and we're dealing with titles
	 * most definately */
	capitalize( string );

	/* OK so things might become a 'lil bit TOO proper, and we
	 * fix it by hardwiring a bit of English grammar. As every kid
	 * knows, in titles all words EXCEPT prepositions, especially
	 * those with one syllable, are NOT written with capital letters.
	 * The words "and" and "but" are also exceptions. I didnt include
	 * the word "on" because it can have different meanings... */
	string = replacestring( string, " The ", " the " );
	string = replacestring( string, " To ", " to " );
	string = replacestring( string, " At ", " at " );
	string = replacestring( string, " For ", " for " );
	string = replacestring( string, " Of ", " of " );
	string = replacestring( string, " In ", " in " );
	string = replacestring( string, " On Top Of ", " On top of " );
	string = replacestring( string, " By ", " by " );
	string = replacestring( string, " From ", " from " );
	string = replacestring( string, " With ", " with " );
	string = replacestring( string, " And ", " and " );
	string = replacestring( string, " But ", " but " );

	for( i = 0; replacement_strings[ i ]; i+=2 ) {
		string = replacestring( string,
					replacement_strings[ i ],
					replacement_strings[ i + 1 ] );
	}

	return string;
}

static gchar *get_artist( const gchar *dir, const gchar *file )
{
	gchar **tmp;
	gchar *artist = NULL;

	if( dir ) {
		tmp = g_strsplit( dir, " - ", 0 );
	} else {
		tmp = NULL;
	}
	/* If the directory is named "Foo - Bar", we presume
	 * Foo is the artist, and Bar is the album title */
	if( tmp && tmp[ 1 ] ) {
		artist = g_strdup( *tmp );
	} else {
		g_strfreev( tmp );
		/* Else if the file is named "Foo - Bar", we assume
		 * Foo is the artist name */
		tmp = g_strsplit( file, " - ", 0 );
		if( tmp && tmp[ 1 ] ) {
			artist = g_strdup( *tmp );
		} else if( dir ) {
			/* Otherwise we will use the directory name as it is,
			 * assuming this is the artist is the best we can do... */
			artist = g_strdup( dir );
		}
	}  
	g_strfreev( tmp );
	return artist;
}

static gchar *get_title( const gchar *file )
{
	gchar **tmp;
	gchar *title = NULL;
	
	/* Else if the file is named "Foo - Bar", we assume
	 * Bar is the title */
	tmp = g_strsplit( file, " - ", 0 );
	if( tmp && tmp[ 1 ] ) {
		title = g_strdup( *( tmp + 1 ) );
	} else {
		/* Otherwise we will use the file name as it is,
		 * assuming this is the title is the best we can do... */
		title = g_strdup( file );
	}
	g_strfreev(tmp);
	return title;
}

static gchar *get_album( gchar *dir )
{
	gchar **tmp;
	gchar *album = NULL;
	
	if( dir ) {
		tmp = g_strsplit( dir, " - ", 0 );
		/* If the directory is named "Foo - Bar", we presume
		 * Foo is the artist, and Bar is the album title */
		if( tmp && tmp[ 1 ] ) {
			album = g_strdup( *( tmp + 1 ) );
		}
		g_strfreev( tmp );
	}
	return album;
}

void nomad_id3_get_info_from_uri( const gchar *uri,
				 gchar **artist,
				 gchar **album,
				 gchar **title )
{
	/* no id3 tag, first url decode the filename */
	gchar *file;
	const gchar *ext;
	gchar *temp;
	gchar *dir;
	
	file = nomad_id3_unescape_string( uri, G_DIR_SEPARATOR_S );
	if( ! file ) {		
		file = g_strdup( uri );
	}
	
	/* remove extension .xxx or .xxxx */
	ext = strrchr( file, '.' );
	if( ext ) {
		*(gchar*)(ext) = '\0';
	}
	
	/* get last dir name and filename */
	dir = NULL;
	temp = strrchr( file, G_DIR_SEPARATOR );
	if( temp && temp != file ) {
		gchar *start;

		*temp = '\0';
		
		start = strrchr( file, G_DIR_SEPARATOR );
		
		*temp = G_DIR_SEPARATOR;

		if( start ) {
			dir = g_strndup( start + 1,
					 temp - start - 1 );
		}
	}
	temp = g_path_get_basename( file );
	g_free( file );
	file = temp;

	if( dir ) {
		dir = cleanup_dirname( dir );
		dir = beautify_string( dir );
	}
	file = cleanup_filename( file );
	file = beautify_string( file );
	file = kill_tracknumber( file );

	if( artist && ! *artist ) {
		*artist = get_artist( dir, file );
	}
	if( album && ! *album ) {
		*album = get_album( dir );
	}
	if( title && ! *title ) {
		*title = get_title( file );
	}
}
