# encoding: utf-8
#Copyright (C) 2012 J.r0ck <j69@ar156.dip.jp>

#
# 人工無能 TwitAngela5
# 辞書を作成するクラス
#

module TwAngela

  class DictFactory
    include TwAngela
    
    def initialize()
      @keywords = []
    end

    def analyzeTimeline
      File.delete(@@CRAWFILE) if File.exists?(@@CRAWFILE)
      
      tw = TwitterHelper.new
      tw.dumpTimeline(@@TLFILE, @@DATAROW) unless @@DEBUGMODE
      
      spamcheker = SpamChecker.new
      
      data = []
      open(@@TLFILE) {|file|
        while line = file.gets
          line = line.strip
          unless line.empty?
            linedata = line.split(/\s*\t\s*/)
            unless spamcheker.checkSpam(linedata[3])
              token = removeTags(linedata[3])
              token = token.gsub('　', ' ')
              data << token unless token.empty?
            end
          end
        end
      }
      
      # 対象としたTLのデータ
      writeFile(@@DATAFILE, data, 'w') if @@DEBUGMODE

      mecab = MecabHelper.new
      mecab.analyze(data.join(' '))
      @keywords = mecab.keywors
    end

    def analyzeMentions(statusObject)
      File.delete(@@CRAWFILE) if File.exists?(@@CRAWFILE)
      
      data = statusObject.token
      data = removeTags(data)
      data = data.gsub('　', ' ')

      mecab = MecabHelper.new
      mecab.analyze(data)

      @keywords = mecab.keywors_for_replay

      dump = {}
      if File.exist?(@@KEYWORDS)
        Pathname.new(@@KEYWORDS).open('rb') do |f|
          dump = Marshal.load(f)
        end
      end
      
      old = dump[statusObject.user]
      @keywords.concat(old) if old && old.length < 50
      @keywords.uniq!

      dump[statusObject.user] = @keywords
      Pathname.new(@@KEYWORDS).open('wb') do |f|
        Marshal.dump(dump, f)
      end

      @keywords << statusObject.name
    end

    attr_accessor :keywords
  end
end

