(**************************************************************************)
(*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    *)
(*  Copyright (C) 2003, 2004, 2005  Yamagata Yoriyuki                     *)
(*                                                                        *)
(*  This program is free software; you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation; either version 2 of the License, or (at  *)
(*  your option) any later version.                                       *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful, but   *)
(*  WITHOUT ANY WARRANTY; without even the implied warranty of            *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *)
(*  General Public License for more details.                              *)
(**************************************************************************)

(********************)
(* ܥդ *)
(********************)

open Trans

(* ܥ *)
type parse_obj = {graph : Graph.t;
		  start_pos : int; start_state : int;
		  string : string ;
		  forward_cost_markers : (int * Edge.t option) Mark.t Mark.t}

let parse string ~state ~pos ~len =
  (* դ֤ɲä *)
  let graph = Graph.create (pos+len+1) in
  let init = Vertex.create ~pos ~state in
  Graph.add_vertex graph init;
  
  (* posˤΰ֤ǻϤޤǤդä *)
  for i = 0 to len - 1 do
    (* ǤΥꥹ *)
    let mrph_list = 
      Mrph.lookup_prefix string (pos+i) (len - i) in
    (* ǽ̤θ *)
    let undef_list =
      (* ̤θʬΥѥեޥ󥹤ľ *)
      let r = ref [] in
      for j = 1  to len - i do
	let u_list = 
	  Mrph.undefwords (EucString.sub string (pos + i) j) in
	r := (List.map (fun m -> (m, j)) u_list) @ !r;
      done;
      !r in

    (* ĺvĹmlenηmrphɲä롣*)
    let add v (mrph, mlen) =
      let s', cost = 
	Mrph.connect_mrph ~state:v.Vertex.state ~mrph in
      if cost < 0 then () else
      let label = {EdgeLabel.mrph = mrph; EdgeLabel.cost = cost; EdgeLabel.pos = v.Vertex.pos} in
      let v' = Vertex.create (pos+i+mlen) s' in
      let e = Edge.create v label v' in
      
      Graph.add_edge graph e
    in


    (* ĺv˷Ǥ³ *)
    let expand v =
      List.iter (add v) mrph_list;
      List.iter (add v) undef_list in

    (* ߤΰ֤ηǤŸ *)
    Graph.iter_pos expand graph (pos+i)
  done;

  {graph=graph; string=string;
   start_pos=pos; start_state=state;
   forward_cost_markers=Mark.create ()}   

(**************)
(* Ŭõ *)  
(**************)

module Cost = struct
  type label = EdgeLabel.t
  type t = int

  let weight label = label.EdgeLabel.cost
  let zero = 0
  let add = (+)
  let compare = (-)
end

module PathFinder = Viterbi(Cost)

(* PathFinder֤ѥ򥹥󤷤ơѤΥꥹȤ *)
let rec convert_path = function
    [] -> []
  | (edge, cost) :: rest ->
      let mrph = (Edge.label edge).EdgeLabel.mrph in
      let p = (Edge.src edge).Vertex.pos in
      let {Vertex.pos = p'; Vertex.state = s} = Edge.dst edge in
      (mrph, p, p'-p, s, cost) :: convert_path rest

let get_forward_cost parse_obj fv =
    match Mark.lookup parse_obj.forward_cost_markers fv with
      Some m  -> m
    | None ->
	let m = PathFinder.forward_cost parse_obj.graph fv in
	Mark.mark parse_obj.forward_cost_markers fv m;
	m

let best_path_from_parse_obj parse_obj ~state ~pos  =
  let last_pos = Graph.length parse_obj.graph - 1 in
  let fv = {Vertex.state = state; Vertex.pos = pos} in
  let forward_cost = get_forward_cost parse_obj fv in
  let path = 
    PathFinder.best_path (parse_obj.graph, forward_cost) fv last_pos in
  convert_path path
	
(************)
(* Ѵ *)
(************)

let list_candidates string ~state ~pos ~mrph_len ~len =
  let mrph_list = Mrph.lookup string pos mrph_len in
  let mrph_list = 
    Mrph.undefwords (EucString.sub string pos mrph_len) @ mrph_list in
  let f mrph =
    let state', cost = Mrph.connect_mrph ~state ~mrph in
    (mrph, state', cost) in
  let list = List.map f mrph_list in
  let cmp (_, _, c1) (_, _, c2) = c1 - c2 in
  List.sort cmp list

let list_candidates_from_parse_obj parse_obj ~state ~pos ~mrph_len =
  let graph = parse_obj.graph in
  let v = {Vertex.state = state; Vertex.pos = pos} in
  let edge_list = Graph.succ_e graph v in

  let filter e = 
    let label = Edge.label e in
    EucString.length (Mrph.keyword (label.EdgeLabel.mrph)) = mrph_len in

  let compare e1 e2 = 
    (Edge.label e1).EdgeLabel.cost - (Edge.label e2).EdgeLabel.cost in

  let convert e  = 
    let label = Edge.label e in
    let dst = Edge.dst e in
    (label.EdgeLabel.mrph, dst.Vertex.state, dst.Vertex.pos) in

  (*ͤηѴ *)
  List.map convert 
    (* ȤǥȡǤդΥȤѤƥȤƤ롣 *)
    (List.sort compare
       (* keyword_lenmrph_len˰פΤӤ*)
       (List.filter filter edge_list))

(********)
(* ؽ *)
(********)

(*  *)
let rec list_last = function
    [] -> raise Not_found
  | [x] -> x
  | _ :: rest -> list_last rest

let learn parse_obj ~state ~pos path =

  if path = [] then () else

  let graph = parse_obj.graph in
  let last_pos = 
    let _, p, len = list_last path in
    p+len in

  (* pathˤǰʳϥ̵Ȥ롣*)
  let module Cost = struct
    type label = EdgeLabel.t
    type t = Int of int | Inf

    let weight label = 
      let pos = label.EdgeLabel.pos in
      let s_form = Mrph.surface_form label.EdgeLabel.mrph in
      let length = EucString.length (Mrph.keyword (label.EdgeLabel.mrph)) in
      let match_fun (s, p, len) =
	p = pos && len = length && s = s_form in
      if List.exists match_fun path then
	Int label.EdgeLabel.cost
      else
	Inf

    let zero = Int 0

    let add c1 c2 = 
      match c1, c2 with
	Int c1, Int c2 -> Int (c1 + c2)
      | Inf, _ | _, Inf  -> Inf

    let compare c1 c2 =
      match c1, c2 with
	Int c1, Int c2 -> c1 - c2
      | Inf, Inf -> 0
      | Inf, _ -> 1
      | _, Inf -> -1
  end in
  
  let module Learner = Viterbi(Cost) in

  let fv = {Vertex.state = state; Vertex.pos = pos} in
  let forward_cost = Learner.forward_cost graph fv in

  let path  =
    try 
      Learner.best_path (graph, forward_cost) fv last_pos
    with Not_found -> [] in
  let f (e, _) = 
    let mrph = (Edge.label e).EdgeLabel.mrph in
(*    Printf.eprintf "%s\n" (Mrph.surface_form mrph); flush stderr;*)
    Mrph.incr_count mrph in
  List.iter f path
