(**************************************************************************)
(*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    *)
(*  Copyright (C) 2003, 2004  Yamagata Yoriyuki                           *)
(*                                                                        *)
(*  This program is free software; you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation; either version 2 of the License, or (at  *)
(*  your option) any later version.                                       *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful, but   *)
(*  WITHOUT ANY WARRANTY; without even the implied warranty of            *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *)
(*  General Public License for more details.                              *)
(**************************************************************************)

let dic_lookup s p len = 
  PersonalDict.lookup s p len @
  Chasen.darts_lookup s p len

let dic_lookup_prefix s p len =
  PersonalDict.lookup_prefix s p len @ 
  Chasen.darts_lookup_prefix s p len
 
let connect_mrph ~state ~mrph =
  let new_state, con_cost = 
    Chasen.connect_cost state mrph in
  if con_cost < 0 then raise Exit else
  let hinsi_cost =
    if Chasen.is_undef mrph = 0 then
      let hinsi = Chasen.hinsi mrph in
      Chasen.hinsi_cost hinsi
    else
      let undef_info = Chasen.undef_info mrph in
      Chasen.undef_info_cost undef_info +
	Chasen.undef_info_cost_step undef_info *
	(Chasen.keyword_len mrph / 2) in
  let mrph_cost = hinsi_cost * Chasen.weight mrph * Chasen.mrph_cost_weight in
  let cost = mrph_cost + con_cost in
  (new_state, cost)

module Int = struct type t = int let compare = (-) end
module IMap = Map.Make(Int)

type vertex = {vertex_cost : int;		
               (*vertex̤ܤΥ*)
	       from_pos : int;
	       from_state : int;
	       to_pos : int;
	       to_state : int;
	       mrph : Chasen.mrph}

let print_vertex vertex =
 Printf.printf 
    "{vertex_cost = %d; from_pos = %d; from_state = %d; to_pos = %d; to_state = %d}"
    vertex.vertex_cost vertex.from_pos vertex.from_state vertex.to_pos
    vertex.to_state

type node = {pos : int;
	     state : int; 
	     (*ǽΥΡɤ餳ޤκǾΥ*)
	     path_cost : int;		
	     (* path_cost -> vertices *)
             in_vertex_map : vertex list IMap.t;
	     (* ΥΡɤǤvertex *)
	     out_vertex_list : vertex list;}

let print_node node =
  Printf.printf 
    "{pos = %d; state = %d; path_cost = %d; in_vertex_map = ["
    node.pos node.state node.path_cost;
  IMap.iter (fun _ vertex_list ->
    List.iter print_vertex vertex_list)
    node.in_vertex_map;
  Printf.printf "]; out_vertex_list = [";
  List.iter print_vertex node.out_vertex_list;
  Printf.printf "]"
    

module IntPair = struct 
  type t = int * int 
  let compare (p1, s1) (p2, s2) =
    let sgn = p1 - p2 in
    if sgn <> 0 then sgn else
    s1 - s2
end

type parse_obj = (int, node) Hashtbl.t array

(* [node_set]ΰ[pos],[state]ΥΡɤõ̵пΡ
   ֤*)
let find_node ~pos ~state node_set = 
  try Hashtbl.find node_set.(pos) state with Not_found -> 
    {pos = pos;
     state = state;
     path_cost = 0x3fffffff; 
     out_vertex_list = [];
     in_vertex_map = IMap.empty}
   
let add_vertex ~pos ~to_pos ~state mrph node_set =
  match (try Some (connect_mrph state mrph) with Exit -> None) with
    None -> ()
  | Some (new_state, cost) ->
      let vertex =
	{vertex_cost = cost;
	 from_pos = pos;
	 from_state = state;
	 to_pos = to_pos;
	 to_state = new_state;
	 mrph = mrph} in
      assert(Hashtbl.mem node_set.(pos) state);
      let from_node = find_node ~pos ~state node_set in
      let to_node = find_node ~pos:to_pos ~state:new_state node_set in
      let path_cost = from_node.path_cost + vertex.vertex_cost in
      let vertex_map =
	let vlist = 
	  try IMap.find path_cost to_node.in_vertex_map with Not_found -> [] in
	IMap.add path_cost (vertex :: vlist) to_node.in_vertex_map in
      let path_cost = min to_node.path_cost path_cost in
      let to_node =
	{to_node with path_cost = path_cost; in_vertex_map = vertex_map} in
      let from_node =
	{from_node with out_vertex_list = 
	 vertex :: from_node.out_vertex_list} in
      Hashtbl.remove node_set.(pos) state;
      Hashtbl.add node_set.(pos) state from_node;
      Hashtbl.remove node_set.(to_pos) new_state;
      Hashtbl.add node_set.(to_pos) new_state to_node

let list_next_mrph string ~pos ~len =
  dic_lookup_prefix string pos len

let parse string ~state ~pos ~len =
  if len <= 0 then Array.init 0 Hashtbl.create else
  let init_node = 
    {pos = pos; state = state; path_cost = 0; 
     out_vertex_list = [];
     in_vertex_map = IMap.empty} in
  let node_set = Array.init (pos + len + 1) Hashtbl.create in
  Hashtbl.add node_set.(pos) state init_node;
  let no_conversion_list = 
    Chasen.undefwords (EucString.sub string pos len) in
  List.iter (fun mrph ->
    add_vertex ~pos ~to_pos:(pos + len) ~state mrph node_set)
    no_conversion_list;
  for p = pos to pos + len - 1 do
    let mrph_list = list_next_mrph string ~pos:p ~len:(len + pos - p) in
    Hashtbl.iter (fun state _ ->
      List.iter (fun (mrph, ln) ->
	add_vertex ~pos:p ~to_pos:(p + ln) ~state mrph node_set)
	mrph_list)
      node_set.(p);
  done;
  node_set

let best_path node_set pos =
  if Array.length node_set = 0 then [] else
  (*Ǹΰ֤ˤΡɤǺŬΤ*)
  let last_node =	
    let last_slice = node_set.(pos) in
    let f _ n2 a =
      match a with
	None -> Some n2
      | Some n1 ->
	  if n1.path_cost >= n2.path_cost then Some n2 else a in
    match Hashtbl.fold f last_slice None with
      Some last_node -> last_node
    | None -> raise Not_found in
  (* [node]˻Ŭpath֤pathϥ졼 *)
  let rec gather node path =
    try (match IMap.min_elt node.in_vertex_map with
      _, vertex :: _ ->
	let pos' = vertex.from_pos in
	let state' = vertex.from_state in
	let n' = Hashtbl.find node_set.(pos') state' in
	  let path' = 
	    (vertex.mrph, pos', vertex.to_pos - pos',
	     node.state, node.path_cost) :: path in
	  gather n' path'
    | _ -> assert false)
    with Not_found -> path in
  gather last_node []
  
let list_candidates string ~state ~pos ~mrph_len ~len =
  let mrph_list = dic_lookup string pos mrph_len in
  let mrph_list = 
    Chasen.undefwords (EucString.sub string pos mrph_len) @ mrph_list in
  let f mrph =
    let state', cost = connect_mrph ~state ~mrph in
    (mrph, state', cost) in
  let list = List.map f mrph_list in
  let cmp (_, _, c1) (_, _, c2) = c1 - c2 in
  List.sort cmp list
