#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
import codecs
from getopt import getopt

from html2wiki import HtmlToWikiConverter

usage = "%s [-d <divider>] [-b <imagelist>] <input.txt> <output_prefix>" % sys.argv[0]

divider = "_"
bloblist = {}
(optslist, args) = getopt(sys.argv[1:], "d:b:")
if len(args) < 2:
    sys.exit(usage)

for (k, v) in optslist:
    if k == "-b":
        f = open(v, "r")
        for l in f:
            (blob, fname) = l.strip().split("\t", 1)
            bloblist[blob] = fname
        f.close()

fname_in = args[0]
output_prefix = args[1]


f_in = codecs.open(fname_in, "r", encoding="utf-8")
buff = ""
cnt = 1
for l in f_in:
    buff = buff + l
    if l.find('<slash type="break">') != -1:
        c = HtmlToWikiConverter()
        out = c.convert(buff, bloblist)
        f_out = open(output_prefix + divider + str(cnt) + ".txt", "w")
        f_out.write(out)
        f_out.close()
        cnt += 1
        buff = ""
f_in.close()

