/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                       Copyright (c) 1996,1997                         */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                      Author :  Alan W Black                           */
/*                      Date   :  August 1996                            */
/*-----------------------------------------------------------------------*/
/*                                                                       */
/* Phrase break prediction                                               */
/*                                                                       */
/*=======================================================================*/
#include <stdio.h>
#include "festival.h"
#include "modules.h"
#include "fngram.h"

static void phrasing_none(EST_Utterance &u);
static void phrasing_by_cart(EST_Utterance &u);
static void phrasing_by_probmodels(EST_Utterance &u);
static EST_VTCandidate *bb_candlist(EST_Stream_Item &s);
static EST_VTPath *bb_npath(EST_VTPath *p,EST_VTCandidate *c);
static double find_b_prob(EST_VTPath *p,int n,int *state);

// Used in various default value cases
static int B_word = 0;
static int BB_word = 0;
static int NB_word = 0;
static int pos_p_start_tag = 0;
static int pos_pp_start_tag = 0;
static int pos_n_start_tag = 0;

static double gscale_s = 1.0;
static double gscale_p = 0.0;
static EST_Ngrammar *bb_ngram = 0;
static EST_Ngrammar *bb_pos_ngram = 0;
static LISP bb_tags = NIL;
static LISP bb_pos_tree = NIL;
static EST_Utterance *bb_utt = 0;

LISP FT_Phrasify_Utt(LISP utt)
{
    // Predict and add phrasing to an utterance
    EST_Utterance *u = GETUTTVAL(utt);
    LISP phrase_method = ft_get_param("Phrase_Method");

    *cdebug << "Phrasify module\n";

    if (u->stream_present("Phrase"))
	return utt;               // already specified
    else if (phrase_method == NIL)
	phrasing_none(*u);  // all one phrase
    else if (streq("prob_models",get_c_string(phrase_method)))
	phrasing_by_probmodels(*u);
    else if (streq("cart_tree",get_c_string(phrase_method)))
	phrasing_by_cart(*u);
    else
    {
	cerr << "PHRASIFY: unknown phrase method \"" <<
	    get_c_string(phrase_method) << endl;
	festival_error();
    }

    return utt;
}

static void phrasing_none(EST_Utterance &u)
{
    // All in a single phrase
    EST_Stream_Item *w,*phr=0;

    u.create_stream("Phrase");

    for (w=(u.stream("Word")).head(); w != 0; w = next(w))
    {
	if (phr == 0)
	    phr = add_phrase(u);
	link(*w,*phr);
	if (next(w) == 0)
	{
	    w->set_feature("pbreak","B");
	    phr->set_name("4");
	    phr = 0;
	}
    }
    
}

static void phrasing_by_cart(EST_Utterance &u)
{
    EST_Stream_Item *w,*phr=0;
    LISP tree;
    EST_Val pbreak;

    u.create_stream("Phrase");
    tree = siod_get_lval("phrase_cart_tree","no phrase cart tree");

    for (w=(u.stream("Word")).head(); w != 0; w = next(w))
    {
	if (phr == 0)
	    phr = add_phrase(u);
	link(*w,*phr);
	pbreak = wagon_predict(u,*w,tree);
	w->set_feature("pbreak",pbreak.string());
	if ((pbreak == "B") || (pbreak == "BB"))
	{
	    phr->set_name((EST_String)pbreak);
	    phr = 0;
	}
    }
    
}

static void phrasing_by_probmodels(EST_Utterance &u)
{
    // Predict phrasing using POS and prob models of B distribution 
    EST_Stream_Item *w,*phr=0;
    EST_String pbreak;
    int num_states;
    LISP bb_pos_name,bb_name,l1,lgscale;

    bb_pos_tree = siod_get_lval("break_pos_cart_tree",NULL);
    if (bb_pos_tree == NIL)
    {
	bb_pos_name = siod_get_lval("break_pos_ngram_name",
				    "no break pos ngram given");
	if ((bb_pos_ngram = get_ngram(get_c_string(bb_pos_name))) == 0)
	{
	    cerr << "PHRASIFY: no ngram called \"" <<
		get_c_string(bb_pos_name) << "\" defined." << endl;
	    festival_error();
	}
    }
    lgscale = siod_get_lval("break_gram_scale_s",NULL);
    if (lgscale == NIL)
	gscale_s = 1.0;
    else
	gscale_s = get_c_float(lgscale);
    lgscale = siod_get_lval("break_gram_scale_p",NULL);
    if (lgscale == NIL)
	gscale_p = 0.0;
    else
	gscale_p = get_c_float(lgscale);
	
    bb_name = siod_get_lval("break_ngram_name","no break ngram given");
    if ((bb_ngram = get_ngram(get_c_string(bb_name))) == 0)
    {
	cerr << "PHRASIFY: no ngram called \"" <<
	    get_c_string(bb_name) << "\" defined." << endl;
	festival_error();
    }

    bb_tags = siod_get_lval("break_tags","no break tag list given");
    gc_protect(&bb_tags);
    l1 = siod_get_lval("pos_p_start_tag",NULL);
    if (l1 != NIL) 
	pos_p_start_tag = bb_pos_ngram->get_vocab_word(get_c_string(l1));
    l1 = siod_get_lval("pos_pp_start_tag",NULL);
    if (l1 != NIL) 
	pos_pp_start_tag = bb_pos_ngram->get_vocab_word(get_c_string(l1));
    l1 = siod_get_lval("pos_n_start_tag",NULL);
    if (l1 != NIL) 
	pos_n_start_tag = bb_pos_ngram->get_vocab_word(get_c_string(l1));
    for (w=(u.stream("Word")).head(); w != 0; w = next(w))
    {   // Set up tag index for pos ngram
	w->set_feature("pos_index",bb_pos_ngram->
		       get_vocab_word(w->feature("pos").string()));
    }
    B_word = bb_ngram->get_vocab_word("B");
    NB_word = bb_ngram->get_vocab_word("NB");
    BB_word = bb_ngram->get_vocab_word("BB");

    bb_utt = &u;  // used below

    num_states = bb_ngram->states();
    EST_Viterbi_Decoder v(bb_candlist,bb_npath,num_states);

    v.initialise(u.stream("Word"));
    v.search();
    v.result("pbreak_index");

    // Given predicted break, go through and add phrases 
    LISP phrase_type = siod_get_lval("phrase_type_tree",NULL);
    u.create_stream("Phrase");
    for (w=(u.stream("Word")).head(); w != 0; w = next(w))
    {
	w->set_feature("pbreak",bb_ngram->
		       get_vocab_word(w->feature("pbreak_index").Int()));
	if (phr == 0)
	    phr = add_phrase(u);
	link(*phr,*w);
	if (phrase_type != NIL)
	{
	    EST_Val npbreak = wagon_predict(u,*w,phrase_type);
	    w->set_feature("pbreak",npbreak.string());  // may reset to BB
	}
	pbreak = w->feature("pbreak");
	if (pbreak == "B")
	    w->set_feature("blevel",4);
	else if (pbreak == "mB")
	    w->set_feature("blevel",3);
	if ((pbreak == "B") || (pbreak == "BB"))
	{
	    phr->set_name((EST_String)pbreak);
	    phr = 0;
	}
    }

    gc_unprotect(&bb_tags);
    bb_tags = NIL;
}

static EST_VTCandidate *bb_candlist(EST_Stream_Item &s)
{
    // Find candidates with apriori probabilites
    EST_IVector window(bb_pos_ngram->order());
    int tag;

    if (bb_pos_ngram->order() == 4)
    {
	window(1) = s.feature("pos_index").Int();
	if (prev(&s) != 0)
	    window(0) = prev(&s)->feature("pos_index").Int();
	else
	    window(0) = pos_p_start_tag;
	if (next(&s) != 0)
	    window(2) = next(&s)->feature("pos_index").Int();
	else
	    window(2) = pos_n_start_tag;
    }
    else if (bb_pos_ngram->order() == 3)
    {
	window(0) = s.feature("pos_index").Int();
	if (next(&s) != 0)
	    window(1) = next(&s)->feature("pos_index").Int();
	else
	    window(1) = pos_n_start_tag;
    }
    else if (bb_pos_ngram->order() == 5)
    {   // This is specific for some set of pos tagsets
	window(2) = s.feature("pos_index").Int();
	if (prev(&s) != 0)
	{
	    window(1) = prev(&s)->feature("pos_index").Int();
	}
	else
	{
	    window(1) = pos_p_start_tag;
	}
	if (next(&s) != 0)
	{
	    window(3) = next(&s)->feature("pos_index").Int();
	    if (next(next(&s)) != 0)
		window(0) = next(next(&s))->feature("pos_index").Int();
	    else
		window(0) = 0;
	}
	else
	{
	    window(3) = pos_n_start_tag;
	    window(0) = 0;
	}
    }
    else
    {
	cerr << "PHRASIFY: can't deal with ngram of size " <<
	    bb_pos_ngram->order() << endl;
	festival_error();
    }
    double prob=1.0;
    EST_VTCandidate *all_c = 0;
    EST_Val labelled_brk = ffeature(*bb_utt,s,"Token.pbreak");

    if ((labelled_brk != "0") &&
	(ffeature(*bb_utt,s,"Token.addr") !=
	 ffeature(*bb_utt,s,"n.Token.addr")))
    {   // there is a labelled break on the token so respect it 
	EST_VTCandidate *c = new EST_VTCandidate;
	c->s = &s;
	c->name = bb_ngram->get_vocab_word(labelled_brk.string());
	c->score = log(0.95);  // very very likely, but not absolute
	c->next = all_c;
	all_c = c;  // but then if you give only one option ...
    }
    else if (s.name() == ".end_utt")
    {   // This is a quick check to see if forcing "." to B is worth it
	EST_VTCandidate *c = new EST_VTCandidate;
	c->s = &s;
	c->name = B_word;
	c->score = log(0.95);  // very very likely, but not absolute
	c->next = all_c;
	all_c = c;
    }
    else if (siod_get_lval("break_non_bayes",NULL) != NIL)
    {
        /* This uses the "wrong" formula to extract the  probability      */
        /* Extract P(B | context) rather than P(context | B) as below     */
        /* This gives worse results as well as not following Bayes        */
        /* equations                                                      */
	EST_VTCandidate *c;
	LISP l;
	for (l=bb_tags; l != 0; l=cdr(l))
	{
	    c = new EST_VTCandidate;
	    c->s = &s;
	    tag = bb_ngram->get_vocab_word(get_c_string(car(l)));
	    c->name = tag;
	    window(bb_pos_ngram->order()-1) = tag;
	    const EST_DiscreteProbDistribution &pd = 
		bb_pos_ngram->prob_dist(window);
	    if (pd.samples() == 0)
	    {
		if (tag == B_word)
		    prob = 0.2;
		else
		    prob = 0.8;
	    }
	    else
		prob = pd.probability(tag);
	    if (prob == 0)
		c->score = log(0.0000001);
	    else
		c->score = log(prob);
	    c->next = all_c;
	    all_c = c;
	}
    }
    else
    {   // Standard Bayes model
	EST_VTCandidate *c;
	LISP l;
	int taddr;
	for (l=bb_tags; l != 0; l=cdr(l))
	{
	    c = new EST_VTCandidate;
	    c->s = &s;
	    tag = bb_ngram->get_vocab_word(get_c_string(car(l)));
	    c->name = tag;
	    window(bb_pos_ngram->order()-1) = tag;
	    prob = bb_pos_ngram->reverse_probability(window);

	    // If this word came from inside a token reduce the
	    // probability of a break

	    if (((taddr = ffeature(*bb_utt,s,"Token.addr")) != 0) &&
		(taddr == ffeature(*bb_utt,s,"n.Token.addr").Int()))
//		(ffeature(*bb_utt,s,"n.Token.Word:num").Int() < 4))
	    {
		float weight = ffeature(*bb_utt,s,"pbreak_scale");
		if (weight == 0) weight = 0.5;
		if (tag == B_word)
		    prob *= weight;
		else
		    prob = 1.0-((1.0-prob)*weight);
	    }
	    if (prob == 0)
		c->score = log(0.0000001);
	    else
		c->score = log(prob);
	    c->next = all_c;
	    all_c = c;
	}
    }

    return all_c;
}

static EST_VTPath *bb_npath(EST_VTPath *p,EST_VTCandidate *c)
{
    EST_VTPath *np = new EST_VTPath;
    double prob;
    double lprob,lang_prob;
    
    np->c = c;
    np->from = p;
    int n = c->name.Int();
    prob = find_b_prob(p,n,&np->state);
    if (prob == 0)
	lprob = log(0.00000001);
    else
	lprob = log(prob);
    
    lang_prob = (1.0 * c->score) + gscale_p;
    lang_prob = c->score;

    np->set_feature("lscore",lang_prob+lprob);
    if (p==0)
	np->score = (lang_prob+lprob);
    else
	np->score = (lang_prob+lprob) + p->score;
    
    return np;
}

static double find_b_prob(EST_VTPath *p,int n,int *state)
{
    EST_IVector window(bb_ngram->order());
    EST_VTPath *t;
    int f=FALSE;
    int i;
    double prob,nprob;

    for (t=p,i=bb_ngram->order()-2; i >= 0; i--)
    {
	if ((t == 0) || (t->c == 0))
	{                 // when there is no previous context use
	    if (f)        // the specified previous (B and NB)
		window(i) = NB_word;
	    else
	    {
		window(i) = B_word;
		f = TRUE;		
	    }
	}
	else
	{
	    window(i) = t->c->name.Int();
	    t = t->from;
	}
    }

    window(bb_ngram->order()-1) = n;
    const EST_DiscreteProbDistribution &pd = bb_ngram->prob_dist(window);
    if (pd.samples() == 0)
	prob = 0;
    else
	prob = (double)pd.probability(n);
    // This is too specific
    if (n == B_word)
	prob *= gscale_s;
    
    // Going to have to fix this sometime
    // Need the value of the state I'm going to 
    for (i=0; i < bb_ngram->order()-1; i++)
	window(i) = window(i+1);
    bb_ngram->predict(window,&nprob,state);
    
    return prob;
}

EST_Stream_Item *add_phrase(EST_Utterance &u)
{
    EST_Stream_Item item;

    item.init("Phrase");
    item.set_name("phrase");
    
    u.stream("Phrase").append(item);
    return u.stream("Phrase").tail();

}

