<?php

/*	umineko
	Japanese verb stemmer
*/

define ("a", "a");
define ("e", "e");
define ("i", "i");
define ("o", "o");
define ("u", "u");

define("V_POLITE", "polite");
define("V_PLAIN", "plain");

// define("V_NONPAST", "non-past"); // really now.
define("V_PAST", "past");

define("V_NEGATIVE", "negative");

define("V_TE_FORM", "te-form");

define("V_POTENTIAL", "potential");
define("V_CAUSATIVE", "causative");
define("V_PASSIVE", "passive");
define("V_CAUSPASS", "causative-passive"); /* because of a vagary of the last_two deinflector, type-2 verbs get the V_POTENTIAL_PASSIVE and V_CAUSATIVE */
define("V_POTENTIAL_PASSIVE", "potential/passive (type II)");

define("V_CONDITIONAL", "conditional"); // たら
define("V_PROVISIONAL", "provisional"); // ば

define("V_VOLITIONAL", "volitional"); // ましょう
define("V_CONJECTURAL", "conjectural"); // だろう、でしょう
define("V_ALTERNATIVE", "alternative"); // たり
define("V_IMPERATIVE", "imperative"); // なさい and friends

define("V_DESIDERATIVE", "desiderative"); // たい



class verb { 
	public $state = array(); 
	function add($new_state) { $this->state = array_merge($this->state, func_get_args()); }
	function __toString() { foreach ($this->state as $state) $_ .= "$state, "; return substr($_, 0, strlen($_) - 2);}
	}

$kana_to_consonant = array(
	"あ" => "v", "い" => "v", "う" => "v", "え" => "v", "お" => "v", // v for vowel
	"か" => "k", "き" => "k", "く" => "k", "け" => "k", "こ" => "k", 
	"さ" => "s", "し" => "s", "す" => "s", "せ" => "s", "そ" => "s", 
	"た" => "t", "ち" => "t", "つ" => "t", "て" => "t", "と" => "t", 
	"な" => "n", "に" => "n", "ぬ" => "n", "ね" => "n", "の" => "n", 
	"は" => "h", "ひ" => "h", "ふ" => "h", "へ" => "h", "ほ" => "h", 
	"ま" => "m", "み" => "m", "む" => "m", "め" => "m", "も" => "m", 
	"ら" => "r", "り" => "r", "る" => "r", "れ" => "r", "ろ" => "r", // ra... are you serious?
	"が" => "g", "ぎ" => "g", "ぐ" => "g", "げ" => "g", "ご" => "g", 
	"ざ" => "z", "じ" => "z", "ず" => "z", "ぜ" => "z", "ぞ" => "z", 
	"だ" => "d", "ぢ" => "d", "づ" => "d", "で" => "d", "ど" => "d", 
	"ば" => "b", "び" => "b", "ぶ" => "b", "べ" => "b", "ぼ" => "b", 
	"ぱ" => "p", "ぴ" => "p", "ぷ" => "p", "ぺ" => "p", "ぽ" => "p"
	);
	
$kana_to_romaji = array(
	"あ" => "va", "い" => "vi", "う" => "vu", "え" => "ve", "お" => "vo", // v for vowel
	"か" => "ka", "き" => "ki", "く" => "ku", "け" => "ke", "こ" => "ko", 
	"さ" => "sa", "し" => "si", "す" => "su", "せ" => "se", "そ" => "so", 
	"た" => "ta", "ち" => "ti", "つ" => "tu", "て" => "te", "と" => "to", 
	"な" => "na", "に" => "ni", "ぬ" => "nu", "ね" => "ne", "の" => "no", 
	"は" => "ha", "ひ" => "hi", "ふ" => "hu", "へ" => "he", "ほ" => "ho", 
	"ま" => "ma", "み" => "mi", "む" => "mu", "め" => "me", "も" => "mo", 
	"ら" => "ra", "り" => "ri", "る" => "ru", "れ" => "re", "ろ" => "ro", // ra
	"が" => "ga", "ぎ" => "gi", "ぐ" => "gu", "げ" => "ge", "ご" => "go", 
	"ざ" => "za", "じ" => "zi", "ず" => "zu", "ぜ" => "ze", "ぞ" => "zo", 
	"だ" => "da", "ぢ" => "di", "づ" => "du", "で" => "de", "ど" => "do", 
	"ば" => "ba", "び" => "bi", "ぶ" => "bu", "べ" => "be", "ぼ" => "bo", 
	"ぱ" => "pa", "ぴ" => "pi", "ぷ" => "pu", "ぺ" => "pe", "ぽ" => "po"
	);
	
$romaji_to_kana = array(
	"va" => "あ", "vi" => "い", "vu" => "う", "ve" => "え", "vo" => "お", // v for vowel
	"ka" => "か", "ki" => "き", "ku" => "く", "ke" => "け", "ko" => "こ", 
	"sa" => "さ", "si" => "し", "su" => "す", "se" => "せ", "so" => "そ", 
	"ta" => "た", "ti" => "ち", "tu" => "つ", "te" => "て", "to" => "と", 
	"na" => "な", "ni" => "に", "nu" => "ぬ", "ne" => "ね", "no" => "の", 
	"ha" => "は", "hi" => "ひ", "hu" => "ふ", "he" => "へ", "ho" => "ほ", 
	"ma" => "ま", "mi" => "み", "mu" => "む", "me" => "め", "mo" => "も", 
	"ra" => "ら", "ri" => "り", "ru" => "る", "re" => "れ", "ro" => "ろ", // ra
	"ga" => "が", "gi" => "ぎ", "gu" => "ぐ", "ge" => "げ", "go" => "ご", 
	"za" => "ざ", "zi" => "じ", "zu" => "ず", "ze" => "ぜ", "zo" => "ぞ", 
	"da" => "だ", "di" => "ぢ", "du" => "づ", "de" => "で", "do" => "ど", 
	"ba" => "ば", "bi" => "び", "bu" => "ぶ", "be" => "べ", "bo" => "ぼ", 
	"pa" => "ぱ", "pi" => "ぴ", "pu" => "ぷ", "pe" => "ぺ", "po" => "ぽ"
	);
	
function load_rules ($rulefile) {
	$_ = explode("--", $rulefile);
	$rules = array();
	// print_r($_);
	foreach ($_ as $r) {
		$r = str_replace("\n", "", $r);
		$__ = strpos($r, "{");
		$regex = trim(substr($r, 0, $__));
		// echo $regex;
		preg_match("/{(.*)}/", $r, $matches);
		$code = trim($matches[1]);
		$rules[$regex] = $code;
		}
	return $rules;
	}
	
function in_edict ($word) {
	global $edict;
	return strpos($edict, "\n$word ") !== false ? strpos($edict, "\n$word ") : false;
	}
	
function tail ($word, $length = 1) {
	return substr($word, -$length);
	}
	
function head ($word, $length = 1) {
	return substr ($word, 0, strlen($word) - $length);
	}
	
function transform ($word, $to) {
	global $kana_to_consonant, $romaji_to_kana;
	$head = head($word);
	$tail = tail($word);
	$c = $kana_to_consonant[$tail];
	$new_tail = $romaji_to_kana[$c . $to];
	return $head . $new_tail;
	}
	
function unwind ($word) {
	global $kana_to_consonant, $kana_to_romaji;
	$tail = tail($word);
	return tail($kana_to_romaji[$tail]);
	}
	
function is_irreg ($chopped_word) {
	// a chopped word, after the chopping taken place and we need to make it into a normal word to feedback again
	$last = unwind($chopped_word);
	return $last == e or strlen($chopped_word) == 1 ? true : false;
	}
	
function make_plain ($word) {
	if (is_irreg($word)) $word = $word . "る";
	else $word = transform($word, u);
	return $word;
	}
	
function find_edict ($word) {
	global $edict;
	preg_match("/\n$word.+/", $edict, $_);
	return substr($_[0], 1);
	}
	
?>