# encoding: utf-8
#Copyright (C) 2012 J.r0ck <j69@ar156.dip.jp>

#
# 人工無能 TwitAngela4
# mecabユーティリティ
#

module TwAngela

  class MecabHelper
    include TwAngela
    
    def initialize(parm='-O chasen')
      @parm = parm
      @mecab = MeCab::Tagger.new(@parm)
    end
  
    def parse(str)
      if @parm == '-O wakati'
        parsed = @mecab.parse(str)
        return parsed.force_encoding(str.encoding)
      else
        return str
      end
    end
    
    def analyze(str)
      @words = []
      @nouns = {}
      
      data = str.split(/\s+/)
      data.each do |t|
        node = @mecab.parseToNode(t)
        i = 1
        while node
          w = node.surface.force_encoding(str.encoding)
          unless w.empty?
            f = node.feature.force_encoding(str.encoding)
            feature = f.split(/\s*,\s*/)
            word = Word.new(w, feature, i)
            @words.last.next = word if @words.length > 0
            word.pre = @words.last if @words.length > 0
            @words << word
            i = i + 1
          end
          node = node.next
        end
      end

      pre = ''
      @words.each do |w2|
        if w2.feature[0] == '名詞' && (w2.feature[1] == '固有名詞' || w2.feature[7])
          pre << w2.token
        elsif w2.feature[0] == '接頭詞' && (w2.feature[1] == '名詞接続' || w2.feature[7])
          pre << w2.token
        else
          unless pre.empty?
            @nouns[pre] ? @nouns[pre] += 1 : @nouns[pre] = 0
          end
          pre = ''
        end
      end

      return @words
    end

    def keywors
      alist = []
      
      i = 1
      5.times {
        alist = @nouns.select{|k, v| v == i}.sort{|a, b| b[0].length <=> a[0].length}
        if alist.length < 1
          i += i
        else
          break
        end
      }
      if alist.length < 1
        alist = @nouns.select{|k, v| v == 0}.sort{|a, b| b[0].length <=> a[0].length}
      end
      return selectKeywords(alist)
    end

    def keywors_for_replay
      alist = @nouns.sort{|a, b| b[0].length <=> a[0].length}
      kwords = selectKeywords(alist)  
      return kwords
    end
    
    def selectKeywords(alist)
      hash = {}
      alist.each do |k, v|
        node = @mecab.parseToNode(k)
        node = node.next
        while node
          f = node.feature.force_encoding(k.encoding)
          feature = f.split(/\s*,\s*/)
          if feature[0] == '名詞' && feature[1] != '代名詞' && feature[1] != '非自立' && feature[1] != '接尾'
            hash[k] = v
          end
          break
        end
      end

      keywords = hash.keys
      keywords.uniq!
      keywords = keywords.sort_by{rand}

      return keywords
    end
    
    attr_accessor :words
  end
end
