Feed::mixi_keyword_ranking作りました。

先日mixiに実装された日記キーワードランキング。毎日変わって面白そうなのでPraggerでキーワードの部分だけ抽出するプラグインを書きました。mixiに登録しているメールアドレスとパスワードをYAMLで書けばゲット。日付はオプションです。

以下にソース書きます。

# Feed::mixi_keyword_ranking - Custom Feed of mixi keyword ranking - Meguu
#
# - module: Feed::mixi_keyword_ranking
#   config:
#     email: your_email
#     password: your_password
#     date: yyyy/mm/dd [option]
#

require 'net/http'
require 'kconv'
require 'rss/maker'

def mixi_keyword_ranking(config, data)
  email = config['email']
  password = config['password']
  date = (config['date'] ? config['date'].split("/").map do |s| s.to_i end : NIL)
  mr = MixiReader.new(email, password)
  mr.keyword_ranking_html(date ? Time.mktime(date[0], date[1], date[2]) : NIL)
  data = mr.ranking
end

class MixiReader

  @@MIXI = "mixi.jp"
  @@LOGIN = "/login.pl"
  @@HOME = "/home.pl"
  @@KEYWORD_RANKING = "/keyword_ranking.pl"

  def initialize(email, password)
    Net::HTTP.version_1_2
    @email = email
    @password = password
    login
  end

  def login
    Net::HTTP.start(@@MIXI, 80) do |http|
      req = Net::HTTP::Post.new(@@LOGIN)
      query = "next_url=#{ue(@@HOME)}&email=#{ue(@email)}&password=#{ue(@password)}"
      response = http.request(req, query)
      cookiereq = response['set-cookie']
      @cookie = pcookie(cookiereq)
    end
  end

  def pcookie(cookiereq)
    cookiea = []
    cookiea << cookiereq.match(/BF_SESSION=[^;]+/).to_s
    cookiea << cookiereq.match(/BF_STAMP=[^;]+/).to_s
    cookiea.join("; ")
  end

  def keyword_ranking_html(date = NIL)
    @date = date
    Net::HTTP.start(@@MIXI, 80) do |http|
      query = (date == NIL ? "" : date.strftime("?year=20%y&month=%m&day=%d"))
      req = Net::HTTP::Get.new(@@KEYWORD_RANKING + query)
      req["Cookie"] = @cookie
      response = http.request(req)
      @html = response.body.toutf8
    end
  end
  
  def ranking
    items = []
    @html.each_line do |l|
      item = RSS::RDF::Item.new
      if l.match(/<a href=\"(search_diary.pl\?submit=search&keyword=[^&]+&mode=rank)\" style=\"font-size:11pt;\">([^<]+)<\/a>/)
        item.title = $2
        item.link = "http://" + @@MIXI + "/" + $1
        item.date = @date if @date
        items << item
      end
      if l.match(/<a href=\"(search_diary.pl\?submit=search&keyword=[^&]+&mode=rank)\">([^<]+)<\/a>/)
        item.title = $2
        item.link = "http://" + @@MIXI + "/" + $1
        item.date = @date if @date
        items << item
      end
    end
    items
  end
  
  def ue(string)
    string.gsub(/([^ a-zA-Z0-9_.-]+)/n) do
      '%' + $1.unpack('H2' * $1.size).join('%').upcase
    end.tr(' ', '+')
  end
end