/*
 * ʸФƸΥꥹȤ롣
 * make_candidates()contextƤФ롣
 *
 * ϼˡǹԤ
 * (1)splitterƤʻФproc_splitter_info()
 *    
 * (2)Ҥ餬ʤΤߤȥʤΤߤθ
 * (3)ǸʸȲᤷ̵
 */
/*
 * Funded by IPA̤Ƨեȥ¤ 2001 9/30
 * Copyright (C) 2000-2005 TABATA Yusuke
 * Copyright (C) 2004-2005 YOSHIDA Yuichi
 * Copyright (C) 2002 UGAWA Tomoharu
 *
 * $Id: compose.c,v 1.25 2005/08/19 04:20:25 oxy Exp $
 */
/*
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */
#if 0		/* Patched by G-HAL */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <anthy/dic.h>
#include <anthy/splitter.h>
#include <anthy/segment.h>
#include "wordborder.h"
#else
#if defined(HAVE_CONFIG_H)
# include "config.h"
#endif

#if defined(HAVE_STDLIB_H)
# include <stdlib.h>
#endif
#if defined(HAVE_MALLOC_H)
# include <malloc.h>
#endif
#if defined(HAVE_STDIO_H)
# include <stdio.h>
#endif
#if defined(HAVE_STRING_H)
# include <string.h>
#endif
#if defined(HAVE_STRINGS_H)
# include <strings.h>
#endif

#include "anthy/settings.h"	/* Patched by G-HAL, Sun,02 Nov,2008 */
#include "anthy/xstr.h"		/* Patched by G-HAL, Sat,01 Nov,2008 */
#include "anthy/cand_ent_score.h"	/* Patched by G-HAL, Sat,17 Oct,2009 */
#include "anthy/dic.h"
#include "anthy/splitter.h"
#include "anthy/segment.h"
#include "wordborder.h"
#endif


static struct cand_ent *
alloc_cand_ent(void)
{
  struct cand_ent *ce;
  ce = (struct cand_ent *)malloc(sizeof(struct cand_ent));
  ce->nr_words = 0;
  ce->elm = NULL;
  ce->mw = NULL;
  ce->core_elm_index = -1;
  ce->dep_word_hash = 0;
  return ce;
}

/*
 * ʣ
 */
static struct cand_ent *
dup_candidate(struct cand_ent *ce)
{
  struct cand_ent *ce_new;
  int i;
  ce_new = alloc_cand_ent();
  ce_new->nr_words = ce->nr_words;
  ce_new->str.len = ce->str.len;
  ce_new->str.str = anthy_xstr_dup_str(&ce->str);
  ce_new->elm = (struct cand_elm*) malloc(sizeof(struct cand_elm)*ce->nr_words);
  ce_new->flag = ce->flag;
  ce_new->core_elm_index = ce->core_elm_index;
  ce_new->mw = ce->mw;
  ce_new->score = ce->score;
  ce_new->dep_word_hash = ce->dep_word_hash;

  for (i = 0 ; i < ce->nr_words ; i++) {
    ce_new->elm[i] = ce->elm[i];
  }
  return ce_new;
}

/** ʸ˸ɲä */
static void
push_back_candidate(struct seg_ent *seg, struct cand_ent *ce)
{
  /* seg_ent˸ceɲ */
  seg->nr_cands++;
  seg->cands = (struct cand_ent **)
    realloc(seg->cands, sizeof(struct cand_ent *) * seg->nr_cands);
  seg->cands[seg->nr_cands - 1] = ce;
  /**/
  if (anthy_splitter_debug_flags() & SPLITTER_DEBUG_CAND) {
    anthy_print_candidate(ce);
    printf("\n");
  }
}

static void
push_back_guessed_candidate(struct seg_ent *seg)
{
  xchar xc;
  xstr *xs;
  struct cand_ent *ce;
  if (seg->str.len < 2) {
    return ;
  }
  /* ǸʸϽ줫 */
  xc = seg->str.str[seg->str.len - 1];
  if (!(anthy_get_xchar_type(xc) & XCT_DEP)) {
    return ;
  }
  /* Ǹʸʳ򥫥ʤˤƤߤ */
  ce = alloc_cand_ent();
  xs = anthy_xstr_hira_to_kata(&seg->str);
  xs->str[xs->len-1] = xc;
 #if 0		/* Patched by G-HAL, Sun,02 Nov,2008 */
  ce->str.str = anthy_xstr_dup_str(xs);
  ce->str.len = xs->len;
  ce->flag = CEF_GUESS;
  anthy_free_xstr(xs);
  push_back_candidate(seg, ce);
 #else
  if (0 == anthy_xstrcmp(&(seg->str),xs)) {
    /* ϤΤߤʤΤǡǸʸʳ򥫥ʤˤʤ */
  } else {
    ce->str.str = anthy_xstr_dup_str(xs);
    ce->str.len = xs->len;
    ce->flag = CEF_GUESS | CEF_GUESS_KATAcoreHIRAdep | anthy_settings.anthy_mode.extra_flag.guess;
    push_back_candidate(seg, ce);
  }
  anthy_free_xstr(xs);
 #endif
}

/** Ƶ1ñ줺ĸƤƤ */
static int
enum_candidates(struct seg_ent *seg,
		struct cand_ent *ce,
		int from, int n)
{
  int i, p;
  struct cand_ent *cand;
  int nr_cands = 0;
  int pos;

  if (n == ce->mw->nr_parts) {
    /*  */
    /* ʸβϤʤäʬʸɲ */
    xstr tail;
    tail.len = seg->len - from;
    tail.str = &seg->str.str[from];
    anthy_xstrcat(&ce->str, &tail);
    if (ce->str.str && (0 < ce->str.len)) { /* ⤷ϳؽǡƤк */
      push_back_candidate(seg, dup_candidate(ce));
    }
    return 1;
  }

  p = anthy_get_nr_dic_ents(ce->elm[n].se, &ce->elm[n].str);

  /* ʻ줬ƤƤΤǡʻ˥ޥåΤƤ */
  for (i = 0; i < p; i++) {
    wtype_t wt;
    if (anthy_get_nth_dic_ent_is_compound(ce->elm[n].se, i)) {
      continue;
    }
    anthy_get_nth_dic_ent_wtype(ce->elm[n].se, &ce->elm[n].str, i, &wt);

   #if 0		/* Patched by G-HAL, Fri,07 Oct,2011 */
    ce->elm[n].wt = anthy_get_wtype_with_ct(ce->elm[n].wt, CT_NONE);
   #else
    anthy_wtype_set_ct( &(ce->elm[n].wt), CT_NONE );
   #endif
    if (anthy_wtype_include(ce->elm[n].wt, wt)) {
     #if 0	/* Patched by G-HAL, Thu,27 Nov,2008, Mon,01 Dec,2008 */
      xstr word, yomi;

      yomi.len = ce->elm[n].str.len;
      yomi.str = &seg->str.str[from];
      cand = dup_candidate(ce);
      anthy_get_nth_dic_ent_str(cand->elm[n].se,
				&yomi, i, &word);
     #else
      xstr word;
      xstr yomi = { .str = &(seg->str.str[from]), .len = ce->elm[n].str.len };
      unsigned int extra_flag;

      cand = dup_candidate(ce);
      anthy_get_nth_dic_ent_str_with_flagret( cand->elm[n].se,
				&yomi, i, &word, &extra_flag );
      cand->flag |= extra_flag;
     #endif
      cand->elm[n].nth = i;
      cand->elm[n].id = anthy_xstr_hash(&word);

      /* ñ */
      anthy_xstrcat(&cand->str, &word);
     #if 1	/* Patched by G-HAL, Tue,20 Oct,2009, Sun,01 Nov,2009, Sat,07 Nov,2009, Fri,13 Nov,2009 */
      {
	xstr yomi_all = { .str = seg->str.str, .len = 0 };
	uint32_t id_yomi;
	uint32_t id_base;
	int j;
	for (j = n; 0 <= j; --j) {
	  yomi_all.len += ce->elm[j].str.len;
	}
	if (0 < yomi_all.len) {
	  id_yomi = anthy_hash_uint32_update( anthy_hash_xstr_start( &yomi_all ), '\0' );
	  id_base = anthy_hash_uint32_update( anthy_hash_xstr_update( id_yomi, &(cand->str) ), '\0' );
	} else {
	  id_base = 0;
	}
	cand->elm[n].uc_id_tmp = id_base;
      }
     #endif
      free(word.str);
      /* ʬƵƤӽФ³Ƥ */
      nr_cands += enum_candidates(seg, cand,
				  from + yomi.len,
				  n+1);
      anthy_release_cand_ent(cand);
    }
  }

  /* ʻξˤ̤ѴǼñعԤ */
  pos = anthy_wtype_get_pos(ce->elm[n].wt);
  if (nr_cands == 0 || pos == POS_INVAL || pos == POS_NONE) {
    xstr xs;
    xs.len = ce->elm[n].str.len;
    xs.str = &seg->str.str[from];
    cand = dup_candidate(ce);
    cand->elm[n].nth = -1;
    cand->elm[n].id = -1;
    cand->elm[n].uc_id_tmp = 0;	/* Patched by G-HAL, Tue,20 Oct,2009 */
    anthy_xstrcat(&cand->str, &xs);
    nr_cands = enum_candidates(seg,cand,
			       from + xs.len,
			       n + 1);
    anthy_release_cand_ent(cand);
    return nr_cands;
  }

  return nr_cands;
}

/**
 * ʸΤޤñ(ñޤ)θ
 */
static void
push_back_singleword_candidate(struct seg_ent *seg,
			       int is_reverse)
{
  seq_ent_t se;
  struct cand_ent *ce;
  wtype_t wt;
  int i, n;
  xstr xs;

  se = anthy_get_seq_ent_from_xstr(&seg->str, is_reverse);
  n = anthy_get_nr_dic_ents(se, &seg->str);
  /* γƥȥФ */
  for (i = 0; i < n; i++) {
    int ct;
    if (anthy_get_nth_dic_ent_is_compound(se, i)) {
      continue;
    }
    /* ʻФ */
    anthy_get_nth_dic_ent_wtype(se, &seg->str, i, &wt);
    ct = anthy_wtype_get_ct(wt);
    /* ߷ѤʤΤθʤ */
    if (ct == CT_SYUSI || ct == CT_NONE) {
      ce = alloc_cand_ent();
      anthy_get_nth_dic_ent_str_with_flagret(se,&seg->str, i, &xs, &(ce->flag) );	/* Patched by G-HAL, Sun,02 Nov,2008 */
      ce->str.str = xs.str;
      ce->str.len = xs.len;
      ce->flag |= CEF_SINGLEWORD;
      push_back_candidate(seg, ce);
    }
  }
}

static void
push_back_noconv_candidate(struct seg_ent *seg)
{
  /* ̵ѴҲ̾ˤʤʿ̾Τߤˤʤɲ */
  struct cand_ent *ce;
  xstr *xs;

 #if 0		/* Patched by G-HAL, Sat,01 Nov,2008 */
  /* Ҥ餬ʤΤ */
  ce = alloc_cand_ent();
  ce->str.str = anthy_xstr_dup_str(&seg->str);
  ce->str.len = seg->str.len;
  ce->flag = CEF_HIRAGANA;
  push_back_candidate(seg, ce);

  /* ˥ */
  ce = alloc_cand_ent();
  xs = anthy_xstr_hira_to_kata(&seg->str);
  ce->str.str = anthy_xstr_dup_str(xs);
  ce->str.len = xs->len;
  ce->flag = CEF_KATAKANA;
  anthy_free_xstr(xs);
  push_back_candidate(seg, ce);
 #else
  xstr *xs_katakana = anthy_xstr_hira_to_kata( &(seg->str) );
  if (0 == anthy_xstrcmp(&(seg->str),xs_katakana)) {
    /* ϤΤߤʤΤǡҤ餬ʸȥʸʤ
     *
     * ʤߤˡΤߤϤФƤҤ餬ʸȥʸȡ
     * src-ordering/candsort.c  anthy_sort_candidate()  check_dupl_candidate() ˤ
     * (CEF_HIRAGANA | CEF_KATAKANA) == flag ǤѴơ
     * Ҥ餬ʤǤƱ˥ʤǤ롢ȸѤȽ򤵤Ƥޤ
     */
    ce = alloc_cand_ent();
    ce->str.str = anthy_xstr_dup_str( &(seg->str) );
    ce->str.len = seg->str.len;
    ce->flag = CEF_NONE | anthy_settings.anthy_mode.extra_flag.kigou;
    push_back_candidate( seg, ce );

  } else {
    /* Ҥ餬ʤΤ */
    ce = alloc_cand_ent();
    ce->str.str = anthy_xstr_dup_str( &(seg->str) );
    ce->str.len = seg->str.len;
    ce->flag = CEF_HIRAGANA | anthy_settings.anthy_mode.extra_flag.hiragana;	/* Patched by G-HAL, Sun,02 Nov,2008 */
    push_back_candidate( seg, ce );

    /* ˥ */
    ce = alloc_cand_ent();
    ce->str.str = anthy_xstr_dup_str( xs_katakana );
    ce->str.len = xs_katakana->len;
    ce->flag = CEF_KATAKANA | anthy_settings.anthy_mode.extra_flag.katakana;	/* Patched by G-HAL, Sun,02 Nov,2008 */
    push_back_candidate( seg, ce );
  }
  anthy_free_xstr( xs_katakana );
 #endif

  /* Τߤʸ */
  xs = anthy_conv_half_wide(&seg->str);
  if (xs) {
    ce = alloc_cand_ent();
    ce->str.str = anthy_xstr_dup_str(xs);
    ce->str.len = xs->len;
    ce->flag = CEF_NONE | anthy_settings.anthy_mode.extra_flag.kigou;	/* Patched by G-HAL, Sun,02 Nov,2008 */
    anthy_free_xstr(xs);
    push_back_candidate(seg, ce);
  }
}

/* word_listpart_info󤫤cand_elm */
static void
make_cand_elem_from_word_list(struct seg_ent *se,
			      struct cand_ent *ce,
			      struct word_list *wl,
			      int index,
			      int is_reverse)
{
  int i;
  int from = wl->from - se->from;

  for (i = 0; i < NR_PARTS; ++i) {
    struct part_info *part = &wl->part[i];
    xstr core_xs;
    if (part->len == 0) {
      /* Ĺ̵part̵뤹 */
      continue;
    }
    if (i == PART_CORE) {
      ce->core_elm_index = i + index;
    }
    core_xs.str = &se->str.str[from];
    core_xs.len = part->len;
    if (i == PART_DEPWORD) {
      ce->dep_word_hash = anthy_dep_word_hash(&core_xs);
    }
    ce->elm[i + index].se = anthy_get_seq_ent_from_xstr(&core_xs, is_reverse);
    ce->elm[i + index].str.str = core_xs.str;
    ce->elm[i + index].str.len = core_xs.len;
    ce->elm[i + index].wt = part->wt;
    ce->elm[i + index].ratio = RATIO_BASE * wl->len;
    from += part->len;
  }
}


/** ޤwordlistmetawordmeta_wordФ */
static void
make_candidate_from_simple_metaword(struct seg_ent *se,
				    struct meta_word *mw,
				    struct meta_word *top_mw,
				    int is_reverse)
{
  /*
   * ñʻ줬ꤵ줿֤ǥߥåȤ롣
   */
  struct cand_ent *ce;

  /* ʣ(1ޤ)ñǹʸñƤƤ */
  ce = alloc_cand_ent();
  ce->nr_words = mw->nr_parts;
  ce->str.str = NULL;
  ce->str.len = 0;
  ce->elm = (struct cand_elm*) calloc(sizeof(struct cand_elm),ce->nr_words);
  ce->mw = mw;
  ce->score = 0;

  /* Ƭ, Ω, , ° */
  make_cand_elem_from_word_list(se, ce, mw->wl, 0, is_reverse);

 #if 0		/* Patched by G-HAL, Fri,01 May,2009, Tue,19 May,2009 */
  /* WRAPƤGUESSƱˤ򲼤 */
  if (anthy_metaword_type_tab[top_mw->type].status != MW_STATUS_WRAPPED) {
    ce->flag = (se->best_mw == mw) ? CEF_BEST : CEF_NONE;
  } else {
    ce->flag = CEF_GUESS;
  }
 #else
  switch (anthy_metaword_type_tab[mw->type].status) {
  case MW_STATUS_OCHAIRE:                       ce->flag = (CEF_OCHAIRE | CEF_OCHAIREwithINDEP | CEF_OCHAIREwithDEP); break;
  case MW_STATUS_OCHAIREwithoutDEP:             ce->flag = (CEF_OCHAIRE | CEF_OCHAIREwithINDEP); break;
  case MW_STATUS_OCHAIREwithoutINDEP:           ce->flag = (CEF_OCHAIRE | CEF_OCHAIREwithDEP); break;
  case MW_STATUS_OCHAIREwithoutINDEPwithoutDEP: ce->flag = (CEF_OCHAIRE); break;
  case MW_STATUS_CANDHISTORY:                   ce->flag = CEF_CANDHISTORY; break;
  case MW_STATUS_WRAPPED:                       ce->flag = CEF_GUESS; break;
  default:
    {
      ce->flag = (se->best_mw == mw) ? CEF_BEST : CEF_NONE;
      if (top_mw) {
	if (DEP_PART_GUESS == top_mw->dep_class) {
	  ce->flag |= CEF_GUESS;
	} else if (DEP_PART == top_mw->dep_class) {
	  ce->flag |= CEF_GUESS_XCT_PART;
	}
      }
      break;
    }
  }
 #endif

  enum_candidates(se, ce, 0, 0);
  anthy_release_cand_ent(ce);
}

/** combinedmetawordĤθΤưĤθȤƽФ */
static void
make_candidate_from_combined_metaword(struct seg_ent *se,
				      struct meta_word *mw,
				      struct meta_word *top_mw,
				      int is_reverse)
{
  /*
   * ñʻ줬ꤵ줿֤ǥߥåȤ롣
   */
  struct cand_ent *ce;

  /* ʣ(1ޤ)ñǹʸñƤƤ */
  ce = alloc_cand_ent();
  ce->nr_words = mw->nr_parts;
  ce->score = 0;
  ce->str.str = NULL;
  ce->str.len = 0;
  ce->elm = (struct cand_elm*) calloc(sizeof(struct cand_elm),ce->nr_words);
  ce->mw = top_mw;

  /* Ƭ, Ω, , ° */
  make_cand_elem_from_word_list(se, ce, mw->mw1->wl, 0, is_reverse);
  if (mw->mw2) {
    make_cand_elem_from_word_list(se, ce, mw->mw2->mw1->wl, NR_PARTS, is_reverse);
  }

  /* WRAPƤGUESSƱˤ򲼤 */
  if (anthy_metaword_type_tab[top_mw->type].status != MW_STATUS_WRAPPED) {
    ce->flag = (se->best_mw == mw) ? CEF_BEST : CEF_NONE;
  } else {
    ce->flag = CEF_GUESS;
  }

  enum_candidates(se, ce, 0, 0);
  anthy_release_cand_ent(ce);
}


/** splitterξѤƸ
 */
static void
proc_splitter_info(struct seg_ent *se,
		   struct meta_word *mw,
		   /* topȤtreeΥȥå */
		   struct meta_word *top_mw,
		   int is_reverse)
{
  enum mw_status st;
  if (!mw) return;

  /* ޤwordlistmetawordξ */
  if (mw->wl && mw->wl->len) {
    make_candidate_from_simple_metaword(se, mw, top_mw, is_reverse);
    return;
  }

  st = anthy_metaword_type_tab[mw->type].status;
  switch (st) {
  case MW_STATUS_WRAPPED:
    /* wrap줿ΤξФ */
    proc_splitter_info(se, mw->mw1, top_mw, is_reverse);
    break;
  case MW_STATUS_COMBINED:
    make_candidate_from_combined_metaword(se, mw, top_mw, is_reverse);
    break;
  case MW_STATUS_COMPOUND:
    /* Ϣʸ */
    {
      struct cand_ent *ce;
      ce = alloc_cand_ent();
      ce->str.str = anthy_xstr_dup_str(&mw->cand_hint);
      ce->str.len = mw->cand_hint.len;
      ce->flag = CEF_COMPOUND;
      ce->mw = top_mw;
      push_back_candidate(se, ce);
    }
    break;
  case MW_STATUS_COMPOUND_PART:
    /* Ϣʸθġʸ礷ưĤʸȤƤߤ */
    /* BREAK THROUGH */
  case MW_STATUS_OCHAIRE:
  case MW_STATUS_OCHAIREwithoutDEP:		/* Patched by G-HAL, Sat,24 Jan,2009 */
  case MW_STATUS_OCHAIREwithoutINDEP:		/* Patched by G-HAL, Sun,08 Feb,2009 */
  case MW_STATUS_OCHAIREwithoutINDEPwithoutDEP:	/* Patched by G-HAL, Sun,08 Feb,2009 */
  case MW_STATUS_CANDHISTORY:			/* Patched by G-HAL, Fri,14 Nov,2008 */
    {
    /* metawordʤʸ
       ľܤ˻ꤵ줿 */
      struct cand_ent *ce;
      ce = alloc_cand_ent();
      ce->str.str = anthy_xstr_dup_str(&mw->cand_hint);
      ce->str.len = mw->cand_hint.len;
      ce->mw = top_mw;
     #if 0	/* Patched by G-HAL, Fri,14 Nov,2008, Sat,24 Jan,2009, Sun,08 Feb,2009 */
      ce->flag = (st == MW_STATUS_OCHAIRE) ? CEF_OCHAIRE : CEF_COMPOUND_PART;
     #else
      switch (st) {
      case MW_STATUS_OCHAIRE:                       ce->flag = (CEF_OCHAIRE | CEF_OCHAIREwithINDEP | CEF_OCHAIREwithDEP); break;
      case MW_STATUS_OCHAIREwithoutDEP:             ce->flag = (CEF_OCHAIRE | CEF_OCHAIREwithINDEP); break;
      case MW_STATUS_OCHAIREwithoutINDEP:           ce->flag = (CEF_OCHAIRE | CEF_OCHAIREwithDEP); break;
      case MW_STATUS_OCHAIREwithoutINDEPwithoutDEP: ce->flag = (CEF_OCHAIRE); break;
      case MW_STATUS_CANDHISTORY:                   ce->flag = CEF_CANDHISTORY; break;
      default:                                      ce->flag = CEF_COMPOUND_PART; break;
      }
     #endif

      if (mw->len < se->len) {
	/* metawordǥСƤʤΰʸդ */
	xstr xs;
	xs.str = &se->str.str[mw->len];
	xs.len = se->len - mw->len;
	anthy_xstrcat(&ce->str ,&xs);
      }
      push_back_candidate(se, ce);
    }
    break;
  case MW_STATUS_NONE:
    break;
  default:
    break;
  }
}

/** context.cƽФäȤʪ
 * İʾθɬ
 */
void
anthy_do_make_candidates(struct splitter_context *sc,
			 struct seg_ent *se, int is_reverse)
{
  int i;

  /* metaword */
  for (i = 0; i < se->nr_metaword; i++) {
    struct meta_word *mw = se->mw_array[i];
    if (anthy_splitter_debug_flags() & SPLITTER_DEBUG_CAND) {
      anthy_print_metaword(sc, mw);
    }
    proc_splitter_info(se, mw, mw, is_reverse);
  }
  if (anthy_splitter_debug_flags() & SPLITTER_DEBUG_CAND) {
    printf("#done\n");
  }
  /* ñʤɤθ */
  push_back_singleword_candidate(se, is_reverse);

  /* Ҥ餬ʡʤ̵Ѵȥ */
  push_back_noconv_candidate(se);

  /* 䤬Ĥ̵ȤϺǸ夬ǻĤ꤬ʿ̾θ뤫 */
  push_back_guessed_candidate(se);
}
/* vim:ts=8 sw=2 nomodified:
 */
