(**************************************************************************)
(*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    *)
(*  Copyright (C) 2003, 2004, 2005  Yamagata Yoriyuki                     *)
(*                                                                        *)
(*  This program is free software; you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation; either version 2 of the License, or (at  *)
(*  your option) any later version.                                       *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful, but   *)
(*  WITHOUT ANY WARRANTY; without even the implied warranty of            *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *)
(*  General Public License for more details.                              *)
(**************************************************************************)
	    
let dic_lookup s p len = 
  PersonalDict.lookup s p len @
  Chasen.darts_lookup s p len

let dic_lookup_prefix s p len =
  PersonalDict.lookup_prefix s p len @ 
    Chasen.darts_lookup_prefix s p len

let connect_mrph ~state ~mrph =
  let new_state, con_cost = 
    Chasen.connect_cost state mrph in
  if con_cost < 0 then raise Exit else
  let hinsi_cost =
    if Chasen.is_undef mrph = 0 then
      let hinsi = Chasen.hinsi mrph in
      Chasen.hinsi_cost hinsi
    else
      let undef_info = Chasen.undef_info mrph in
      Chasen.undef_info_cost undef_info +
	Chasen.undef_info_cost_step undef_info *
	(Chasen.keyword_len mrph / 2) in
  let mrph_cost = hinsi_cost * Chasen.weight mrph * Chasen.mrph_cost_weight in
  let cost = mrph_cost + con_cost in
  (new_state, cost)

(********************)
(* ܥդ *)
(********************)

module Vertex = struct

  (* ĺ̤ˤãɤ2Ĥʬव롣*)
  type t = {pos : int; state : int; undef : bool}

  (*  *)
  let init ={pos = 0; state = 0; undef = false}
  (* üpos,state=-1,undef=falseɽȤˤ*)
  let terminal = {pos = -1; state = -1 ; undef =false}

  let compare v1 v2 =
    let sgn = v1.pos - v2.pos in
    if sgn <> 0 then sgn else
    let sgn = v1.state - v2.state in
    if sgn <> 0 then sgn else
    if v1.undef then
      if v2.undef then 0 else 1
    else
      if v2.undef then -1 else 0

  let equal v1 v2 = (compare v1 v2 = 0)

      (* äȤϥåȤ٤ *)
      (* FIXME! ʬϤʤ *)
  let hash v =
    if v.undef then
      v.pos lxor v.state
    else
      lnot (v.pos lxor v.state)

end

module EdgeLabel = struct 

  type t = {mrph : Chasen.mrph; cost : int}

  let default = {mrph = Chasen.null_mrph; cost = 0}

  let compare v1 v2 =
    let sgn = v1.cost - v2.cost in
    if sgn <> 0 then sgn else
    Chasen.compare_mrph v1.mrph v2.mrph

  let equal v1 v2 = (compare v1 v2 = 0)
	
end

(* ܥ *)
module TransGraph = Graph.Imperative.Digraph.ConcreteLabeled(Vertex)(EdgeLabel)

type parse_obj = TransGraph.t

let create_vertex p s undef =
    {Vertex.pos = p; Vertex.state = s; Vertex.undef = undef}

let parse string ~state ~pos ~len =
  (* ĺ֤ȤʬषƳǼȤ *)
  let vertex_array = Array.make (len+1) [] in
  (* դ֤ɲä *)
  let graph = TransGraph.create () in 
  TransGraph.add_vertex graph Vertex.init;
  vertex_array.(0) <- [Vertex.init];
  
  (* posˤΰ֤ǻϤޤǤդä *)
  for i = 0 to len - 1 do
    (* ǤΥꥹ *)
    let mrph_list = 
      dic_lookup_prefix string (pos+i) (len - i) in
    (* ǽ̤θ *)
    let undef_list =
      (* ѥեޥ󥹾ͳˤꡢŪʸΤ̤θǤ
      ǽΤߤͤ롣̤θʬΥѥեޥ󥹤ľ *)
      if i <> 0 then [] else
      let r = ref [] in
      for j = len - i  to len - i do
	let u_list = 
	  Chasen.undefwords (EucString.sub string (pos + i) j) in
	r := List.rev_append u_list !r;
      done;
      !r in

    (* ĺvĹmlenηmrphɲä롣*)
    let add v undef (mrph, mlen) =
      let s', cost = 
	connect_mrph ~state:v.Vertex.state ~mrph in
      let label = {EdgeLabel.mrph = mrph; EdgeLabel.cost = cost} in
      let v' = create_vertex (pos+i+mlen) s' undef in
      let e = TransGraph.E.create v label v' in

      if not (TransGraph.mem_vertex graph v') then
	vertex_array.(i+mlen) <- v' :: vertex_array.(i+mlen);
      TransGraph.add_edge_e graph e in

    let add_undef v undef = add v true (undef, len - i) in

    (* ĺv˷Ǥ³ *)
    let expand v =
      List.iter (add v false) mrph_list;
      (* ̤ɤϢܤʤ *)
      if v.Vertex.undef then () else
      List.iter (add_undef v) undef_list in

    (* ߤΰ֤ηǤŸ *)
    List.iter expand vertex_array.(i);
  done;
  
  (* üĤ *)
  TransGraph.add_vertex graph Vertex.terminal;
  let add_termlink v =
    let e = TransGraph.E.create v EdgeLabel.default Vertex.terminal in
    TransGraph.add_edge_e graph e in
  List.iter add_termlink vertex_array.(len);
  (* ֤Τϥդ *)
  graph

(**************)
(* Ŭõ *)  
(**************)

module Cost = struct
  type label = EdgeLabel.t
  type t = int

  let weight label = label.EdgeLabel.cost
  let zero = 0
  let add = (+)
  let compare = (-)
end

module PathFinder = Graph.Path.Dijkstra(TransGraph)(Cost)

let best_path graph = 
  let path, cost = 
    PathFinder.shortest_path graph Vertex.init Vertex.terminal in

  (* PathFinder֤ѥ򥹥󤷤ơѤΥꥹȤ *)
  let rec f p cost path' = function
      [] -> path'
    | edge :: r ->
	let {EdgeLabel.mrph = mrph; EdgeLabel.cost = e_cost} = 
	  TransGraph.E.label edge in
	let {Vertex.pos = p'; Vertex.state = s} = TransGraph.E.dst edge in
	let cost = e_cost + cost in
	f p' cost ((mrph, p, p' - p, s, cost) :: path') r in
  
  (*  *)
  match f 0 0 [] path with
    (mrph, pos, len, s, cost) :: r when 
      Chasen.compare_mrph mrph Chasen.null_mrph = 0 ->
	List.rev r
  | _ -> assert false
	
(************)
(* Ѵ *)
(************)

let list_candidates string ~state ~pos ~mrph_len ~len =
  let mrph_list = dic_lookup string pos mrph_len in
  let mrph_list = 
    Chasen.undefwords (EucString.sub string pos mrph_len) @ mrph_list in
  let f mrph =
    let state', cost = connect_mrph ~state ~mrph in
    (mrph, state', cost) in
  let list = List.map f mrph_list in
  let cmp (_, _, c1) (_, _, c2) = c1 - c2 in
  List.sort cmp list
