(**************************************************************************)
(*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    *)
(*  Copyright (C) 2003, 2004, 2005  Yamagata Yoriyuki                     *)
(*                                                                        *)
(*  This program is free software; you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation; either version 2 of the License, or (at  *)
(*  your option) any later version.                                       *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful, but   *)
(*  WITHOUT ANY WARRANTY; without even the implied warranty of            *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *)
(*  General Public License for more details.                              *)
(**************************************************************************)

(********************)
(* ܥդ *)
(********************)

module Vertex = struct

  type t = {pos : int; state : int}

  (* üpos,state=-1ɽȤˤ*)
  let terminal = {pos = -1; state = -1}

  let compare v1 v2 =
    let sgn = v1.pos - v2.pos in
    if sgn <> 0 then sgn else
    let sgn = v1.state - v2.state in
    sgn

  let equal v1 v2 = (v1.pos = v2.pos) && (v1.state = v2.state)

      (* äȤϥåȤ٤ *)
  let hash v = v.pos lxor v.state

end

module EdgeLabel = struct 

  (* cost = ǼΤΥ + ξ֤ؤϢܥ *)
  type t = {mrph : Mrph.mrph; pos : int; cost : int}

  let make_terminal v =
    {mrph = Mrph.null_mrph; cost = 0; pos = v.Vertex.pos}

  let default = {mrph = Mrph.null_mrph; cost = 0; pos = -1}

  let compare v1 v2 =
    let sgn = v1.pos - v2.pos in
    if sgn <> 0 then sgn else
    let sgn = v1.cost - v2.cost in
    if sgn <> 0 then sgn else
    (* IDEA:mrphIDդ롩 *)
    Mrph.compare_mrph v1.mrph v2.mrph

  let equal v1 v2 = (compare v1 v2 = 0)
	
end

(* ܥ *)
module TransGraph = Graph.Imperative.Digraph.ConcreteLabeled(Vertex)(EdgeLabel)

type parse_obj = TransGraph.t

let create_vertex p s =
    {Vertex.pos = p; Vertex.state = s}

let parse string ~state ~pos ~len =
  (* ĺ֤ȤʬषƳǼȤ *)
  let vertex_array = Array.make (len+1) [] in
  (* դ֤ɲä *)
  let graph = TransGraph.create () in
  let init = {Vertex.pos = pos; Vertex.state = state} in
  TransGraph.add_vertex graph init;
  vertex_array.(0) <- [init];
  
  (* posˤΰ֤ǻϤޤǤդä *)
  for i = 0 to len - 1 do
    (* ǤΥꥹ *)
    let mrph_list = 
      Mrph.lookup_prefix string (pos+i) (len - i) in
    (* ǽ̤θ *)
    let undef_list =
      (* ̤θʬΥѥեޥ󥹤ľ *)
      let r = ref [] in
      for j = 1  to len - i do
	let u_list = 
	  Mrph.undefwords (EucString.sub string (pos + i) j) in
	r := List.rev_append u_list !r;
      done;
      !r in

    (* ĺvĹmlenηmrphɲä롣*)
    let add v (mrph, mlen) =
      try
	let s', cost = 
	  Mrph.connect_mrph ~state:v.Vertex.state ~mrph in
	let label = {EdgeLabel.mrph = mrph; EdgeLabel.cost = cost; EdgeLabel.pos = v.Vertex.pos} in
	let v' = create_vertex (pos+i+mlen) s' in
	let e = TransGraph.E.create v label v' in
	
	if not (TransGraph.mem_vertex graph v') then
	  vertex_array.(i+mlen) <- v' :: vertex_array.(i+mlen);
	TransGraph.add_edge_e graph e

      with Exit -> () in

    let add_undef v undef = add v (undef, len - i) in

    (* ĺv˷Ǥ³ *)
    let expand v =
      List.iter (add v) mrph_list;
      List.iter (add_undef v) undef_list in

    (* ߤΰ֤ηǤŸ *)
    List.iter expand vertex_array.(i);
  done;
  
  (* üĤ *)
  TransGraph.add_vertex graph Vertex.terminal;
  let add_termlink v =
    let e = TransGraph.E.create v (EdgeLabel.make_terminal v)
	Vertex.terminal in
    TransGraph.add_edge_e graph e in
  List.iter add_termlink vertex_array.(len);
  (* ֤Τϥդ *)
  graph

(**************)
(* Ŭõ *)  
(**************)

module Cost = struct
  type label = EdgeLabel.t
  type t = int

  let weight label = label.EdgeLabel.cost
  let zero = 0
  let add = (+)
  let compare = (-)
end

module PathFinder = Graph.Path.Dijkstra(TransGraph)(Cost)

(* PathFinder֤ѥ򥹥󤷤ơѤΥꥹȤ *)
let convert_path pos path =
  let rec f p cost path' = function
      [_] -> path'
    | edge :: r ->
	let {EdgeLabel.mrph = mrph; EdgeLabel.cost = e_cost} = 
	  TransGraph.E.label edge in
	let {Vertex.pos = p'; Vertex.state = s} = TransGraph.E.dst edge in
	let cost = e_cost + cost in
	f p' cost ((mrph, p, p' - p, s, cost) :: path') r 
    | [] -> assert false in
  List.rev (f pos 0 [] path)  

let best_path_from_parse_obj graph ~state ~pos =
    let fv = {Vertex.state = state; Vertex.pos = pos} in
    let path, cost = PathFinder.shortest_path graph fv Vertex.terminal in
    convert_path pos path
	
(************)
(* Ѵ *)
(************)

let list_candidates string ~state ~pos ~mrph_len ~len =
  let mrph_list = Mrph.lookup string pos mrph_len in
  let mrph_list = 
    Mrph.undefwords (EucString.sub string pos mrph_len) @ mrph_list in
  let f mrph =
    let state', cost = Mrph.connect_mrph ~state ~mrph in
    (mrph, state', cost) in
  let list = List.map f mrph_list in
  let cmp (_, _, c1) (_, _, c2) = c1 - c2 in
  List.sort cmp list

let list_candidates_from_parse_obj graph ~state ~pos ~mrph_len =
  let v = {Vertex.state = state; Vertex.pos = pos} in
  let edge_list = TransGraph.succ_e graph v in

  let filter (_, label, _) = 
    EucString.length (Mrph.keyword (label.EdgeLabel.mrph)) = mrph_len in

  let compare (_, label1, _) (_, label2, _) = 
    label1.EdgeLabel.cost - label2.EdgeLabel.cost in

  let convert (_, label, v2) = 
    (label.EdgeLabel.mrph, v2.Vertex.state, v2.Vertex.pos) in

  (*ͤηѴ *)
  List.map convert 
    (* ȤǥȡǤդΥȤѤƥȤƤ롣 *)
    (List.sort compare
       (* keyword_lenmrph_len˰פΤӤ*)
       (List.filter filter edge_list))

(********)
(* ؽ *)
(********)

(* Ǥ *)
let remove_last list =
  match List.rev list with
    [] -> []
  | _ :: list -> List.rev list

let learn graph ~state ~pos path =

  (* pathˤǰʳϥ̵Ȥ롣*)
  let module Cost = struct
    type label = EdgeLabel.t
    type t = Int of int | Inf

    let weight label = 
      let is_terminal = 
	Mrph.compare_mrph label.EdgeLabel.mrph Mrph.null_mrph = 0 in
      if is_terminal then Int 0 else
      let pos = label.EdgeLabel.pos in
      let s_form = Mrph.surface_form label.EdgeLabel.mrph in
      let length = EucString.length (Mrph.keyword (label.EdgeLabel.mrph)) in
      let match_fun (s, p, len) =
	p = pos && len = length && s = s_form in
      if List.exists match_fun path then
	Int label.EdgeLabel.cost
      else
	Inf

    let zero = Int 0

    let add c1 c2 = 
      match c1, c2 with
	Int c1, Int c2 -> Int (c1 + c2)
      | Inf, _ | _, Inf  -> Inf

    let compare c1 c2 =
      match c1, c2 with
	Int c1, Int c2 -> c1 - c2
      | Inf, Inf -> 0
      | Inf, _ -> 1
      | _, Inf -> -1
  end in
  
  let module PathFinder = Graph.Path.Dijkstra(TransGraph)(Cost) in

  let fv = {Vertex.state = state; Vertex.pos = pos} in

  let path, _ =
    try 
      PathFinder.shortest_path graph fv Vertex.terminal 
    with Not_found -> [], Cost.Inf in
  let f e = 
    let mrph = (TransGraph.E.label e).EdgeLabel.mrph in
(*    Printf.eprintf "%s\n" (Mrph.surface_form mrph); flush stderr;*)
    Mrph.incr_count mrph in
  List.iter f (remove_last path)
