#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import urllib2
import codecs
import getpass
import urllib
import re
import os.path

from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
import sfjp
from html2wiki import HtmlToWikiConverter

def regularize_url(text):
    text = text.replace(" ", "_")
    text = text.replace("/", "_")
    text = text.replace("+", "_")
    return text

def read_text(target_file, bloblist):
    """read html-style text and parse and convert.
    return value: list of wiki-style text
    """
    paged_text = []
    f = codecs.open(target_file, "r", encoding="utf-8")
    title = f.readline().rstrip()
    page_cnt = 0
    buff = ""
    for l in f:
        buff = buff + l
        if l.find('<slash type="break">') != -1:
            c = HtmlToWikiConverter()
            out = c.convert(buff, bloblist)
            paged_text.append(out)
            page_cnt += 1
            buff = ""
    c = HtmlToWikiConverter()
    out = c.convert(buff, bloblist)
    paged_text.append(out)
    page_cnt += 1
    f.close()
    return (title, paged_text)

def main():
    usage = "%s {all|page|image} <projectname> <input.txt> <imagelist>" % sys.argv[0]
    try:
        mode = sys.argv[1]
        project_name = sys.argv[2]
        target_file = sys.argv[3]
        imagelist = sys.argv[4]
    except IndexError:
        sys.exit(usage)

    upload_image = False
    upload_pdf = False
    upload_single = False
    upload_page = False
    if mode == "all":
        upload_image = True
        upload_pdf = True
        upload_single = True
        upload_page = True
    elif mode == "image":
        upload_image = True
    elif mode == "pdf":
        upload_pdf = True
    elif mode == "single":
        upload_single = True
    elif mode == "page":
        upload_page = True

    # read bloblist
    f = open(imagelist, "r")
    bloblist = {}
    for l in f:
        (blob, fname) = l.strip().split("\t", 1)
        bloblist[blob] = fname
    f.close()

    # read contents
    (title, pages) = read_text(target_file, bloblist)

    wiki = sfjp.Wiki()
    user = ""
    passwd = ""
    if user == "":
        user = raw_input('user: ')
    if passwd == "":
        passwd = getpass.getpass('password: ')
    wiki.login(user, passwd)

    # upload pages
    page_title = title
    page_name = regularize_url(title)
    page_name = urllib.quote(page_name.encode("utf-8"))

    # get attached images
    rex = re.compile(r"\[\[Thumb\(([^,)]*)")
    imgs = rex.findall(pages[0])

    print >> sys.stderr, "uploading to %s : %s (%d)" % (project_name, page_title, len(pages[0]))
    if upload_page:
        wiki.edit_page(project_name, page_name,
                       page_title.encode("utf-8"), pages[0])
    if upload_image:
        for fn in imgs:
            if os.path.exists(fn):
                print >> sys.stderr, "uploading attachment to %s : %s" % (project_name, fn)
                wiki.post_attachment(project_name, page_name, fn)
            else:
                print >> sys.stderr, "file not exists: %s. skip" % (fn)


if __name__ == "__main__":
    main()
