# encoding: utf-8
#Copyright (C) 2012 J.r0ck <j69@ar156.dip.jp>

#
# 人工無能 TwitAngela5
# Webでキーワードに関し検索を行うクラス
#

module TwAngela

  class Crawler
    include TwAngela

    def initialize()
    end

    def crawlWiki(keyword)
      sentence = []
      url = "http://ja.wikipedia.org/wiki/" + URI.encode(keyword) 
      @@Log.info("Wikipadia検索中... " + keyword + " " + url)
      
      begin
        doc = Hpricot(open(url).read.encode('UTF-8'))
        (doc/:p).each do |p|
          sentence.push(p.inner_text.toutf8.strip) if p.inner_text != nil
        end
        (doc/:dd).each do |p|
          sentence.push(p.inner_text.toutf8.strip) if p.inner_text != nil
        end
        doc.search("//div[@class='mw-content-ltr']//li[@id='']").each do |p|
          sentence.push(p.inner_text.toutf8.strip) if p.inner_text != nil
        end
      rescue OpenURI::HTTPError
      rescue => err
        @@Log.warn("Wiki検索でエラーが発生しました。")
        @@Log.warn(err)
        return nil
      end
      return sentence
    end

    def crawlHatena(keyword)
      sentence = []
      url = "http://d.hatena.ne.jp/keyword/" + URI.encode(keyword) 
      @@Log.info("Hatena検索中... " + keyword + " " + url)
      
      begin
        doc = Hpricot(open(url).read.encode('UTF-8'))
        (doc.search("//div[@class='section']/p")).each do |p|
          sentence.push(p.inner_text.toutf8.strip) if p.inner_text != nil
        end
      rescue OpenURI::HTTPError
      rescue => err
        @@Log.warn("Hatena検索でエラーが発生しました。")
        @@Log.warn(err)
        return nil
      end
      return sentence
    end

    def crawlTwitter(keyword)
      sentence = []
      url = "http://search.twitter.com/search.json?lang=ja&rpp=100&q=" + URI.encode(keyword) 
      @@Log.info("Twitter検索…. " + keyword + " " + url)

      i = 0  
      while i < 3
        begin
          agent = Mechanize.new()
          page = agent.get(url)
          js = JSON.parse(page.body)
          js['results'].each {|result|
            text = result['text']
            text = Hpricot(text, :xhtml_strict => true).to_plain_text
            sentence.push(text)
          }
          return sentence
        rescue => err
          @@Log.warn("Twitter検索でエラーが発生しました。")
          @@Log.warn(err)
          i += 1
        end
      end
    end

    def crawlTranslate(word, from="ja", to="en")
      begin
        url = "/V2/Http.svc/Translate?appid=A4ADCE243513A42B824777871685B5E50E33356D&from=" + from + "&to=" + to + "&text=#{URI.encode(word)}"
  
        @@Log.info("変換中... " + word + " " + url)
        h = Net::HTTP.new("api.microsofttranslator.com")
        response = h.get(url)
        if response.message == 'OK'
          doc = REXML::Document.new(response.body)
          return doc.root.text
        else
          raise "#{response.message}"
        end
      rescue => err
        @@Log.warn("Microsoftの翻訳APIでエラーが発生しました。")
        @@Log.warn(err)
        return nil
      end
    end
    
  end

end
