(**************************************************************************)
(*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    *)
(*  Copyright (C) 2003, 2004, 2005  Yamagata Yoriyuki                     *)
(*                                                                        *)
(*  This program is free software; you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation; either version 2 of the License, or (at  *)
(*  your option) any later version.                                       *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful, but   *)
(*  WITHOUT ANY WARRANTY; without even the implied warranty of            *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *)
(*  General Public License for more details.                              *)
(**************************************************************************)

(* $Id: personalDict.mli 158 2005-11-21 18:16:30Z yori $ *)

type mrph = Chasen.mrph
type state = int

let null_mrph = Chasen.null_mrph
let compare_mrph = Chasen.compare_mrph

let keyword = Chasen.keyword

let surface_form mrph = 
  let d = Chasen.mrph_data_of_mrph mrph in
  Chasen.surface_form d

let connect_mrph ~state ~mrph =
  let new_state, con_cost = 
    Chasen.connect_cost state mrph in
  if con_cost < 0 then (~-1, 0) else
  let hinsi_cost =
    if Chasen.is_undef mrph = 0 then
      let hinsi = Chasen.hinsi mrph in
      Chasen.hinsi_cost hinsi
    else
      let undef_info = Chasen.undef_info mrph in
      Chasen.undef_info_cost undef_info +
	Chasen.undef_info_cost_step undef_info *
	(Chasen.keyword_len mrph / 2) in
  let mrph_cost = hinsi_cost * Chasen.weight mrph * Chasen.mrph_cost_weight in
  let cost = mrph_cost + con_cost in
  (new_state, cost)

let lookup s p len = 
  let current = PersonalDict.current_count () in  
  let entries = 
    PersonalDict.lookup s p len @ 
    List.map (fun m -> (m, 0)) (Chasen.darts_lookup s p len) in
  let update (m, c) = 
    let w = Chasen.weight m in
    let const = Config.weight_scale /. Config.total_count_ipa in
    let w' = (float_of_int w) -. (float_of_int (current - c)) *. const in
    let w' = 
      if w' < 0.0 then 0 else if w' > 4000.0 then 4000 else
      int_of_float w' in
    Chasen.set_weight m w';
    m in
  List.map update entries

let lookup_prefix s p len =
  let current = PersonalDict.current_count () in  
  let entries =
    PersonalDict.lookup_prefix s p len @ 
    let f (m, len) = (m, 0, len) in
    let list = Chasen.darts_lookup_prefix s p len in
    List.map f list in
  let update (m, c, len) =
    let w = Chasen.weight m in
    let const = Config.weight_scale /. Config.total_count_ipa in
    let w' = (float_of_int w) -. (float_of_int (current - c)) *. const in
    let w' = 
      if w' < 0.0 then 0 else if w' > 4000.0 then 4000 else
      int_of_float w' in
    Chasen.set_weight m w';
    (m, len) in
  List.map update entries

let add = PersonalDict.add
let remove = PersonalDict.remove

let incr_count = PersonalDict.incr_count
    
let add_new_word ~kaki ~yomi =
  List.iter add (Chasen.new_word ~kaki ~yomi)

let undefwords = Chasen.undefwords

