(**************************************************************************)
(*  Mana : A kana(romaji)-kanji conversion engine using ChaSen algorithm.    *)
(*  Copyright (C) 2003, 2004, 2005  Yamagata Yoriyuki                     *)
(*                                                                        *)
(*  This program is free software; you can redistribute it and/or modify  *)
(*  it under the terms of the GNU General Public License as published by  *)
(*  the Free Software Foundation; either version 2 of the License, or (at  *)
(*  your option) any later version.                                       *)
(*                                                                        *)
(*  This program is distributed in the hope that it will be useful, but   *)
(*  WITHOUT ANY WARRANTY; without even the implied warranty of            *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *)
(*  General Public License for more details.                              *)
(**************************************************************************)

(* $Id: personalDict.ml 184 2006-06-11 17:20:44Z yori $ *)

let db = 
  let flags = [Gdbm.Dbm_create; Gdbm.Dbm_rdwr] in
    Gdbm.opendbm Config.personal_dict_file flags 0o600

let () =
  at_exit (fun () -> Gdbm.close db)

let critical f =
  Gdbm.lock_db db;
  let e = try f (); None with e -> Some e in
  Gdbm.unlock_db db;
  match e with None -> () | Some e -> raise e

let get_entry_list key = 
  match try Some (Gdbm.find db key) with Not_found -> None with
    None -> []
  | Some string ->
      let scanbuf = Scanf.Scanning.from_string string in
      try
	let e = Scanf.bscanf scanbuf "2,[%S,%d;" (fun s d -> (s, d)) in
	let rec scan_loop () =
	  try Scanf.bscanf scanbuf "%S,%d;" 
	      (fun s d -> (s, d) :: scan_loop ()) 
	  with 
	    Scanf.Scan_failure _ | Failure _ | End_of_file -> [] in
	e :: scan_loop ()
      with
	Scanf.Scan_failure _ | Failure _ | End_of_file ->
	  let s = Scanf.bscanf scanbuf "1,[%S;" (fun s -> s) in
	  let rec scan_loop () =
	    try Scanf.bscanf scanbuf "%S;" 
		(fun s -> (s, 0) :: scan_loop ()) 
	    with 
	      Scanf.Scan_failure _ | Failure _ | End_of_file -> [] in
	  (s, 0) :: scan_loop ()
		     
let encode_entry_list entry_list =
  let b = Buffer.create 0 in
  Buffer.add_string b "2,[";
  List.iter (fun (s, d) -> Printf.bprintf b "%S,%d;" s d) entry_list;
  Buffer.add_string b "]";
  Buffer.contents b
	
let current_count () =
  try
    let scanbf = Scanf.Scanning.from_string (Gdbm.find db "") in
    Scanf.bscanf scanbf "2,%d" (fun d->d)
  with Not_found -> 0

let incr_global_count () =
  critical (fun () ->
    let n = current_count () in
    Gdbm.replace db "" (Printf.sprintf "2,%d" (n+1)))

(* 
   w:old weight
   w':new weight
   total:ǡ
   p = exp(-w/c)
   w = -c*log(p)
   w' = -c*log {(p*total+1)/(1+total)}
      = -c*log (p+1/total) + c*log(1+1/total)
      = w - c/total*exp(w/c) + c/total

total = 10000
-c*log(1/10000)=4000
c=434
 *)

let incr_mrph mrph =
  let w = Chasen.weight mrph in
  let w' = 
    let float_w = float_of_int w in
    let c = Config.weight_scale in
    let total = Config.total_count_ipa in
    let float_w' = float_w -. c /. total *. (exp (float_w /. c) -. 1.0) in
    int_of_float float_w' in
  let w' = 
    if w' < 0 then 0
    else if w' > Config.max_weight then Config.max_weight 
    else if w - w' > w/2 then w/2 else w
 in
  Chasen.set_weight mrph w'

let add mrph =
  let keyword = Chasen.keyword mrph in
  (* ʸΥɤ̤Ū˻Ȥ*)
  if keyword = "" then failwith "PersonalDict.add" else
  critical (fun () ->
    let count = current_count () in
    let entry_list = get_entry_list keyword in
    let find_entry (s, _) = 
      let m = Chasen.deserialize_mrph s in
      Chasen.compare_mrph mrph m = 0 in
    let new_entries =
      if List.exists find_entry entry_list then entry_list else
      (Chasen.serialize_mrph mrph, count) :: entry_list in
    let string = encode_entry_list new_entries in
    Gdbm.replace db keyword string)

let incr_count mrph =
  let keyword = Chasen.keyword mrph in
  (* ʸΥɤ̤Ū˻Ȥ*)
  if keyword = "" then failwith "PersonalDict.incr_count" else
  let p (s, d) =
    let mrph' = Chasen.deserialize_mrph s in
    Chasen.compare_mrph mrph mrph' = 0 in
  critical (fun () ->
    let count = current_count () in
    let entry_list = get_entry_list keyword in
    let entry_list =
      match List.partition p entry_list with
	[], r -> (Chasen.serialize_mrph mrph, count) :: r
      | (s, d) :: e, r ->
	  if e <> [] then Printf.eprintf "Corrupted entry : %s" keyword;
	  let m = Chasen.deserialize_mrph s in
	  incr_mrph m;
	  (Chasen.serialize_mrph m, count) :: r in
    let string = encode_entry_list entry_list in
    Gdbm.replace db keyword string)
  
let remove mrph =
  let keyword = Chasen.keyword mrph in
  let p (s, d) =
    let mrph' = Chasen.deserialize_mrph s in
    Chasen.compare_mrph mrph mrph' <> 0 in
  critical (fun () ->
    let entry_list = get_entry_list keyword in
    let entry_list = List.filter p entry_list in
    match entry_list with
      [] ->
	(try Gdbm.remove db keyword with Gdbm.Dbm_error "dbm_delete" -> ())
    | _ ->
	Gdbm.replace db keyword 
	  (encode_entry_list entry_list))
  
let lookup string pos len =
  let key = EucString.sub string ~pos ~len in
  let entries = get_entry_list key in
  let f (s, c) = (Chasen.deserialize_mrph s, c) in
  List.map f entries

let rec lookup_prefix string pos len =
  if len <= 0 then [] else
  (List.map (fun (m, d) -> (m, d, len)) (lookup string pos len)) @
  (lookup_prefix string pos (len - 1))

