# encoding: utf-8
#Copyright (C) 2012 J.r0ck <j69@ar156.dip.jp>

#
# 人工無能 TwitAngela5
# マルコフ連鎖により文字列を生成するクラス
#

module TwAngela

  class Markov
    include TwAngela

    def initialize()
    end

    def parse(str)
      str = '' unless str
      return str if str.empty?

      data = wakati(str)
      return '' unless data[0]

      t1 = forEnglish(data[0]['head'])
      t2 = forEnglish(data[0]['middle'])
      new_text = t1 + t2
      
      while true
        _a = Array.new
        data.each do |hash|
          _a << hash if hash['head'] == t1 && hash['middle'] == t2
        end 
       
        break if _a.size == 0
        num = rand(_a.size)
        new_text = new_text + forEnglish(_a[num]['end'])
        break if _a[num]['end'] == "EOS"
        t1 = forEnglish(_a[num]['middle'])
        t2 = forEnglish(_a[num]['end'])
      end

      new_text.gsub!(/(EOS$)|(EOS $)/,'')
      new_text.force_encoding("utf-8")
      new_text = '' unless new_text
      
      unless new_text.empty?
        mecab = MecabHelper.new
        mecab.analyze(new_text)
        words = mecab.words
        words.each do |w|
          if w.feature[0] == '記号' || w.feature[0] == '接続詞' || w.feature[0] =~ /^助/ || w.feature[1] == '接尾' || w.feature[7] == 'サン'
            new_text = new_text.sub(w.token, '')
          else
            break
          end
        end
      end

      return new_text
    end
    
    def wakati(str)
      data = []

      mecab = MecabHelper.new
      words = mecab.analyze(str)
      
      wkt = []
      pre = ''
      words.each do |w|
        if w.feature[0] == '名詞'
          if pre.empty? && w.feature[1] == '接尾' && (w.token == 'ちゃん' || w.token == 'くん' || w.token == 'さん')
            dummy = [ 'うさぎさん', 'かめさん', 'だるまさん', ]
            pre << dummy[rand(dummy.length)]
          else
            pre << w.token
          end
        elsif w.feature[0] == '接頭詞' && w.feature[1] == '名詞接続'
          pre << w.token
        else
          unless pre.empty?
            wkt << pre
            pre = ''
          end
          wkt << w.token
        end
      end
      
      wkt << 'EOS'
      
      wkt.each_cons(3) do |a|
        h = { 'head' => a[0], 'middle' => a[1], 'end' => a[2] }
        data.push(h)
      end
      
      return data
    end
    private :wakati

  end
  
end
