package jp.ac.dendai.cdl.mori.wikie;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WikIE extends Configured implements Tool {

    /**
     * 言語のプレフィックスを記述したファイル。
     */
    public static final String LANG_DAT = "wiki_lang.dat";
    /**
     * Wikipediaプロジェクトのプレフィックスを記述したファイル。
     */
    public static final String PROJECT_DAT = "wiki_project.dat";

    public static final String REDIRECT_NAMESPACE_DAT = "namespace_redirect.dat";

    public static final String ENTRY = "entry";
    public static final String LEAF = "leaf";
    public static final String NODE = "node";
    public static final String EDGE = "edge";
    public static final String OTHER = "other";
    public static final String TARGET = "target";
    public static final String REDIRECT = "redirect";
    public static final String TEMPLATE = "template";
    public static final String HYPERNYM = "hypernym";

    public static final String UTF8 = "UTF8";

    public static final String MEDIAWIKI_ELEMENT = "mediawiki";
    public static final String BASE_ELEMENT = "base";
    public static final String NAMESPACES_ELEMENT = "namespaces";
    public static final String NAMESPACE_ELEMENT = "namespace";
    public static final String PAGE_ELEMENT = "page";
    public static final String TITLE_ELEMENT = "title";
    public static final String ID_ELEMENT = "id";
    public static final String REVISION_ELEMENT = "revision";
    public static final String TIMESTAMP_ELEMENT = "timestamp";
    public static final String CONTRIBUTOR_ELEMENT = "contributor";
    public static final String USERNAME_ELEMENT = "username";
    public static final String IP_ELEMENT = "ip";
    public static final String TEXT_ELEMENT = "text";

    public static final String KEY_ATTRIBUTE = "key";

    public static final int LEAF_KIND = 1;
    public static final int NODE_KIND = 2;
    public static final int REDIRECT_KIND = 3;
    public static final int HYPERNYM_KIND = 4;

    public static int ARTICLE_NS_NUM = 0;
    public static int IMAGE_NS_NUM = 6;
    public static int CATEGORY_NS_NUM = 14;
    public static int TEMPLATE_NS_NUM = 10;

    public static final String PROP_START_TAG = "wikie.io.startTag";
    public static final String PROP_END_TAG = "wikie.io.endTag";
    public static final String PROP_RESOURCE = "wikie.map.resource";
    public static final String PROP_FUNC = "wikie.job.func";
    public static final String PROP_ISBN_HEADER = "wikie.map.isbnheader";
    public static final String PROP_PROJECT = "wikie.project";
    public static final String PROP_LANG = "wikie.lang";

    public static final String OPT_F = "f";
    public static final String OPT_I = "i";
    public static final String OPT_O = "o";
    public static final String OPT_M = "m";
    public static final String OPT_R = "r";


    public static void main(String[] args) {
        try {
            ToolRunner.run(new WikIE(), args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        try {
            MapReduceDriver driver = createDriverObject(args);
            CommandLine commandLine = driver.createCommandLine(args);

            Job job = new Job();
            job.setJarByClass(WikIE.class);
            FileInputFormat.addInputPath(job, new Path(commandLine.getOptionValue("i")));
            FileOutputFormat.setOutputPath(job, new Path(commandLine.getOptionValue("o")));
            if (commandLine.hasOption("r")) {
                job.setNumReduceTasks(Integer.parseInt(commandLine.getOptionValue("r")));
            }

            driver.configJob(job, args);
            return driver.executeJob(job);


        } catch (Exception e) {
            throw e;
        }
    }

    public static MapReduceDriver createDriverObject(String[] args)
    throws Exception {
        MapReduceDriver driver = null;
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-d")) {
                driver = (MapReduceDriver)Class.forName(args[i + 1]).newInstance();
                break;
            }
        }
        return driver;
    }

    public static CommandLine parseArgs(String[] args) throws ParseException {
        CommandLineParser parser = new BasicParser();
        Options options = createDefaultOptions();
        return parser.parse(options, args, false);
    }

    public static Options createDefaultOptions() {
        Options options = new Options();
        options.addOption(createOption("i", true, true, "入力ファイル"));
        options.addOption(createOption("o", true, true, "出力ディレクトリ"));
        options.addOption(createOption("d", true, true, "MapReduceドライバ"));
        options.addOption(createOption("r", true, false, "Reduceタスク数"));
        options.addOption(createOption("interwiki", true, true, "interwikiダンプファイル"));
        return options;
    }

    public static Option createOption(String opt, boolean hasArgs, boolean required, String description) {
        Option option = new Option(opt, hasArgs, description);
        option.setRequired(required);
        return option;
    }
}
