#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import getpass
import otptools2
import time
import datetime
import sqlite3
import re

usage = sys.argv[0] + """ start end <output_file>
"""

DATABASE = "database/story_dat"

try:
    start = sys.argv[1]
    end = sys.argv[2]
except IndexError:
    sys.exit(usage)

if (not start.isdigit()) or (not start.isdigit()):
    sys.exit(usage)

def login(otp, uname=""):
    if uname == "":
        try:
            uname = raw_input("user: ")
        except KeyboardInterrupt:
            sys.exit("\nabort.")
        try:
            passwd = getpass.getpass("login password:")
        except KeyboardInterrupt:
            sys.exit("\nabort.")
        if not otp.login(uname, passwd):
            sys.exit("login error!")
    otp.save_cookie()

def prepare_database():
    if os.path.exists(DATABASE):
        return
    con = sqlite3.connect(DATABASE)
    cur = con.cursor()

    cmd = """create table stories (
  sid text unique,
  title text default "",
  author text default "",
  section text default "",
  topic text default "",
  view int default 0,
  comments int default 0,
  date real
)"""
    cur.execute(cmd)

    cmd = """create table topics (
  sid text,
  topic text
)"""
    cur.execute(cmd)

    con.commit()
    cur.close()
    con.close()


def str2date(str):
    "01/01 01:01 -> datetime"
    m = re.search(r"([0-9][0-9])/([0-9][0-9]) ([0-9][0-9]):([0-9][0-9])", str)
    if m:
        dateint = [int(m.group(x)) for x in xrange(1,5)]
        year = datetime.datetime.today().year
        month = datetime.datetime.today().month
        if month < dateint[0]:
            year -= 1

        dt = datetime.datetime(year, dateint[0], dateint[1], dateint[2], dateint[3])
        return dt
    else:
        return None

def query_sid(sid):
    con = sqlite3.connect(DATABASE)
    cur = con.cursor()
    cmd = """select sid from stories where ?"""
    cur.execute(cmd, (sid,))
    if len(cur) != 0:
        return 
    

def insert_story_info(otp, story_info):
    con = sqlite3.connect(DATABASE)
    cur = con.cursor()

    si = {}
    rex_sid = re.compile(r"/magazine.sourceforge.jp/article.pl\?sid=([0-9/]*)$")

    m = rex_sid.search(story_info["url"])
    if m:
        sid = m.group(1)
    else:
       return False 
    si["sid"] = sid
    si["title"] = story_info["title"]
    si["author"] = story_info["author"]

    dt = str2date(story_info["datetime"])
    si["date"] = time.mktime(dt.timetuple())
    try:
        cur.execute("""insert into stories ( sid, title, author, date )
                                    values ( :sid, :title, :author, :date )""",
                    si)
    except sqlite3.IntegrityError:
        cur.execute("""update stories set title = :title,
                                          author = :author,
                                          date = :date
                                      where sid=:sid""",
                    si)

    tags = otp.get_tags("/magazine.sourceforge.jp/article.pl?sid=" + sid)
    title = otp.get_title("/magazine.sourceforge.jp/article.pl?sid=" + sid)

    sys.stderr.write(".")
    sys.stderr.flush()

    cur.execute("""delete from topics where sid=?""", (sid,))

    for tag in tags:
#        tag = tag.decode("utf-8")
        cur.execute("""insert into topics ( sid, topic )
                                   values ( :sid, :topic )""",
                    dict(sid=sid, topic=tag))

    if title:
#        title = title.decode("utf-8").replace("Open Tech Press |", "").strip()
        title = title.strip()
        cur.execute("""update stories set title = :title
                                      where sid = :sid""",
                    dict(sid=sid, title=title))
    con.commit()
    cur.close()
    con.close()

# main proc.
def main(start, end):
    otp = otptools2.otptools("./cookies.txt")
    try:
        otp.load_cookie()
    except IOError:
        sys.stderr.write("cannot use cookie file. create.\n")

    if otp.get_cookie() == "":
        login(otp)
        otp.save_cookie()

    story_infos = {}
    start_index = int(start)
    end_index = int(end)

    prepare_database()
    sys.stderr.write( "retrieving lists..." )

    for index in range(start_index, end_index, 40):
        html = otp.get_list(index)
        story_infos = otp.parse_list(html, story_infos)

        for key in story_infos:
            insert_story_info(otp, story_infos[key])
            
    sys.stderr.write( "done.\n" )
#### end of functions

# do main proc.
if __name__ == "__main__":
    main(start, end)
