/*
 * Decompiled with CFR 0.152.
 */
package zephyr.iwanami.koujien;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import zephyr.iwanami.koujien.DicBody;
import zephyr.iwanami.koujien.Util;
import zephyr.util.Counter;
import zephyr.util.DiffUtil;
import zephyr.util.EBWinMap;
import zephyr.util.MultiTreeMap;

public class MergeBody {
    private final DicBody koujien5;
    private final DicBody koujien6;
    private final EBWinMap gaijiMap;
    private static final Pattern A_HREF = Pattern.compile("<a href=\"(.+?)\">(.+?)</a>");
    private static final char A_START = '\ue000';
    private static final char A_END = '\ue001';
    private static final char SUB_START = '\ue002';
    private static final char SUB_END = '\ue003';
    private static final char SUP_START = '\ue004';
    private static final char SUP_END = '\ue005';
    private static final char BR = '\ue006';
    private static final char B_START = '\ue007';
    private static final char B_END = '\ue008';
    private final LinkedList<String> arefList = new LinkedList();
    private static final Pattern SUB = Pattern.compile("<sub>(.+?)</sub>");
    private static final Pattern KANADIFF = Pattern.compile("<b>\\{</b>5 [\u3042\u3044\u3046\u3048\u304a\u3064\u3084\u3086\u3088\u308f]<b>\\}</b><b>\\{</b>6 ([\u3041\u3043\u3045\u3047\u3049\u3063\u3083\u3085\u3087\u308e])<b>\\}</b>");
    private int kan56fix = 0;
    private int nSub = 0;
    private static final Pattern DIFF56 = Pattern.compile("<b>\\{</b>([56][+\\-]?) (.+?)<b>\\}</b>");
    private int diff6drop = 0;
    private int diff6add = 0;
    private int diff56change = 0;
    private int[] diffcnt = new int[2];
    private static final Pattern KANJI_KANA = Pattern.compile("[\\u3400-\\u9F00]<b>\\{</b>6[\\+\\-] [\u3041-\u3093]<b>\\}</b>[\u3041-\u3093]{1,2}");
    private final Counter<String> kanjiKanaCounter = new Counter();
    private int kanjiKanaAdd = 0;
    private static final Pattern BR_HREF_ADD = Pattern.compile("<b>\\{</b>6\\+ <br><a[^>]+>.+?</a><b>\\}</b>");
    private int brHrefAdd = 0;
    private int nHanZen = 0;
    private int nZenHan = 0;
    private final DiffUtil diff = new DiffUtil();
    private int ndd = 0;
    private int nddchanged = 0;
    private int nhitotsu = 0;
    private int nbar5 = 0;
    private int nbar6 = 0;

    public MergeBody() throws Exception {
        this.koujien5 = new DicBody("koujien5.html");
        this.koujien6 = new DicBody("koujien6.html");
        this.gaijiMap = new EBWinMap("KOJIEN-gaiji.txt");
    }

    private int findLemmaIdx(String lemma, ArrayList<String> lemmas, int idx) {
        String l = lemmas.get(idx);
        if (l.equals(lemma)) {
            return idx;
        }
        int i = 1;
        while (i < 1000) {
            int k = idx + i;
            if (k < lemmas.size() && lemmas.get(k).equals(lemma)) {
                return k;
            }
            k = idx - i;
            if (k >= 0 && lemmas.get(k).equals(lemma)) {
                return k;
            }
            ++i;
        }
        System.err.println("cannot find " + lemma + ", idx=" + idx);
        return idx;
    }

    private String normKana(String kana) {
        return kana.replace("\u2010", "");
    }

    private String normHyoki(String hyoki) {
        if (hyoki != null && hyoki.startsWith("\u25cb")) {
            return hyoki.substring(1);
        }
        return hyoki;
    }

    private String dropKana(String txt) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < txt.length()) {
            char c = txt.charAt(i);
            if (c < '\u3041' || '\u30fe' < c) {
                sb.append(c);
            }
            ++i;
        }
        return sb.toString();
    }

    private String kanaDiff(String hyoki, String kana1, String kana2) {
        if (kana1.length() != kana2.length()) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < kana1.length()) {
            char c2;
            char c1 = kana1.charAt(i);
            if (c1 != (c2 = kana2.charAt(i))) {
                int d;
                if (sb.length() > 0) {
                    sb.append(", ");
                }
                if ((d = c1 - c2) == 96) {
                    sb.append(String.format("\u7247%c\u2192\u5e73%c", Character.valueOf(c1), Character.valueOf(c2)));
                } else if (-d == 96) {
                    sb.append(String.format("\u5e73%c\u2192\u7247%c", Character.valueOf(c1), Character.valueOf(c2)));
                } else {
                    sb.append(String.format("%c(x%X)\u2192%c(x%X)", Character.valueOf(c1), (int)c1, Character.valueOf(c2), (int)c2));
                }
            }
            ++i;
        }
        if (hyoki.matches("[\\x00-\\x7F]+")) {
            sb.append("\n\t\t##eng. " + hyoki + " " + kana1 + "\u2192" + kana2 + " [" + sb.toString() + "]");
        }
        return sb.toString();
    }

    private String prepareDiff(String line) {
        this.arefList.clear();
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < line.length()) {
            String sub = line.substring(i);
            if (sub.startsWith("&#x")) {
                int code = Integer.parseInt(line.substring(i + 3, i + 7), 16);
                sb.append((char)code);
                i += 8;
                continue;
            }
            if (sub.startsWith("<a ")) {
                Matcher m = A_HREF.matcher(sub);
                m.find();
                this.arefList.add(m.group(1));
                sb.append('\ue000');
                sb.append(m.group(2));
                sb.append('\ue001');
                i += m.end();
                continue;
            }
            if (sub.startsWith("<su")) {
                if (line.charAt(i + 3) == 'b') {
                    sb.append('\ue002');
                } else {
                    sb.append('\ue004');
                }
                i += 5;
                continue;
            }
            if (sub.startsWith("</su")) {
                if (line.charAt(i + 4) == 'b') {
                    sb.append('\ue003');
                } else {
                    sb.append('\ue005');
                }
                i += 6;
                continue;
            }
            if (sub.startsWith("<br>")) {
                sb.append('\ue006');
                i += 4;
                continue;
            }
            if (sub.startsWith("<b>")) {
                sb.append('\ue007');
                i += 3;
                continue;
            }
            if (sub.startsWith("</b>")) {
                sb.append('\ue008');
                i += 4;
                continue;
            }
            sb.append(line.charAt(i));
            ++i;
        }
        return sb.toString();
    }

    private String afterDiff(String line) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < line.length()) {
            char code = line.charAt(i);
            switch (code) {
                case '\ue000': {
                    sb.append("<a href=\"");
                    sb.append(this.arefList.poll());
                    sb.append("\">");
                    break;
                }
                case '\ue001': {
                    sb.append("</a>");
                    break;
                }
                case '\ue002': {
                    sb.append("<sub>");
                    break;
                }
                case '\ue003': {
                    sb.append("</sub>");
                    break;
                }
                case '\ue004': {
                    sb.append("<sup>");
                    break;
                }
                case '\ue005': {
                    sb.append("</sup>");
                    break;
                }
                case '\ue006': {
                    sb.append("<br>");
                    break;
                }
                case '\ue007': {
                    sb.append("<b>");
                    break;
                }
                case '\ue008': {
                    sb.append("</b>");
                    break;
                }
                default: {
                    if ('\ua000' <= code && code <= '\uf5ff') {
                        sb.append(String.format("&#x%X;", code));
                        break;
                    }
                    sb.append(code);
                }
            }
            ++i;
        }
        return sb.toString();
    }

    private void fixKanaDiff1(StringBuilder sb, String sub) {
        int idx = 0;
        Matcher m = KANADIFF.matcher(sub);
        while (m.find()) {
            if (idx < m.start()) {
                sb.append(sub.substring(idx, m.start()));
            }
            sb.append(m.group(1));
            idx = m.end();
            ++this.kan56fix;
        }
        if (idx < sub.length()) {
            sb.append(sub.substring(idx));
        }
    }

    private String dropDiffInSub(String diffline) {
        StringBuilder sb = new StringBuilder();
        int idx = 0;
        Matcher m = SUB.matcher(diffline);
        while (m.find()) {
            if (idx < m.start(1)) {
                sb.append(diffline.substring(idx, m.start(1)));
            }
            this.fixKanaDiff1(sb, m.group(1));
            idx = m.end(1);
            ++this.nSub;
        }
        if (idx < diffline.length()) {
            sb.append(diffline.substring(idx));
        }
        return sb.toString();
    }

    private int[] diffCount(String diffline) {
        Matcher m = DIFF56.matcher(diffline);
        int ndiff = 0;
        int difflen = 0;
        while (m.find()) {
            switch (m.group(1)) {
                case "6-": {
                    ++this.diff6drop;
                    ++ndiff;
                    break;
                }
                case "6+": {
                    ++this.diff6add;
                    ++ndiff;
                    break;
                }
                case "6": {
                    ++this.diff56change;
                    ++ndiff;
                }
            }
            difflen += m.group(2).length();
        }
        this.diffcnt[0] = ndiff;
        this.diffcnt[1] = difflen;
        return this.diffcnt;
    }

    private int lineCount(String line) {
        line = line.replaceAll("<br>[&#x0-9A-F;]*<a [^>]+>.+?</a>", "");
        return line.split("<br>").length;
    }

    private int diffLineCount(String line1, String line2) {
        return this.lineCount(line2) - this.lineCount(line1);
    }

    private void kanjiKanaAddCount(String diffline) {
        Matcher m = KANJI_KANA.matcher(diffline);
        while (m.find()) {
            this.kanjiKanaCounter.add(m.group());
            ++this.kanjiKanaAdd;
        }
    }

    private void brHrefAddCount(String diffline) {
        Matcher m = BR_HREF_ADD.matcher(diffline);
        while (m.find()) {
            ++this.brHrefAdd;
        }
    }

    private String diffDD(String dd1, String dd2) {
        ++this.ndd;
        if (dd1.equals(dd2)) {
            return dd1;
        }
        String dd1x = this.prepareDiff(dd1);
        String dd2x = this.prepareDiff(dd2);
        LinkedList<DiffUtil.Diff> diffs = this.diff.diff_main(dd1x, dd2x);
        int idx = 0;
        DiffUtil.Diff prev = null;
        while (idx < diffs.size()) {
            DiffUtil.Diff d = diffs.get(idx);
            if (d.text.trim().isEmpty()) {
                diffs.remove(idx);
                continue;
            }
            if (d.text.equals("\u3064") && d.operation == DiffUtil.Operation.INSERT && idx > 0 && prev.text.endsWith("\u4e00") && prev.operation == DiffUtil.Operation.EQUAL) {
                d.operation = DiffUtil.Operation.EQUAL;
                ++this.nhitotsu;
                ++idx;
                continue;
            }
            if (d.operation == DiffUtil.Operation.INSERT && prev != null && prev.operation == DiffUtil.Operation.DELETE) {
                if (d.text.matches("[0-9]+") && d.text.length() == prev.text.length()) {
                    diffs.remove(idx - 1);
                    d.operation = DiffUtil.Operation.EQUAL;
                    continue;
                }
                if (prev.text.matches("[0-9]+") && d.text.length() == prev.text.length()) {
                    diffs.remove(idx - 1);
                    d.operation = DiffUtil.Operation.EQUAL;
                    continue;
                }
                if (d.text.matches("[\\- -~\u30ab]") && prev.text.matches("[\u2010\\uFF00-\\uFF5E\u30f5]")) {
                    diffs.remove(idx - 1);
                    d.operation = DiffUtil.Operation.EQUAL;
                    ++this.nHanZen;
                    continue;
                }
                if (prev.text.matches("[\\- -~]") && d.text.matches("[\u2010\\uFF00-\\uFF5E]")) {
                    diffs.remove(idx - 1);
                    d.operation = DiffUtil.Operation.EQUAL;
                    ++this.nZenHan;
                    continue;
                }
                if (d.text.matches("[\\uA000-\\uF5FF]") && prev.text.matches("[\\uA000-\\uF5FF]")) {
                    String gaiji1 = this.gaijiMap.replace(String.format("%04X", prev.text.charAt(0)));
                    String gaiji2 = this.gaijiMap.replace(String.format("%04X", d.text.charAt(0)));
                    if (gaiji1 != null && gaiji1.equals(gaiji2)) {
                        diffs.remove(idx - 1);
                        d.operation = DiffUtil.Operation.EQUAL;
                        continue;
                    }
                }
                if (prev.text.equals("\u2015")) {
                    diffs.remove(idx - 1);
                    d.operation = DiffUtil.Operation.EQUAL;
                    ++this.nbar5;
                    continue;
                }
                if (d.text.equals("\u2015")) {
                    diffs.remove(idx);
                    prev.operation = DiffUtil.Operation.EQUAL;
                    ++this.nbar6;
                    continue;
                }
            }
            prev = d;
            ++idx;
        }
        boolean bAllEQ = true;
        for (DiffUtil.Diff d : diffs) {
            if (d.operation == DiffUtil.Operation.EQUAL) continue;
            bAllEQ = false;
            break;
        }
        if (bAllEQ) {
            return dd2;
        }
        StringBuilder sb = new StringBuilder();
        DiffUtil.Operation prevOpe = null;
        int i = 0;
        while (i < diffs.size()) {
            DiffUtil.Diff d = diffs.get(i);
            switch (d.operation) {
                case EQUAL: {
                    sb.append(d.text);
                    break;
                }
                case DELETE: {
                    DiffUtil.Operation nextOpe;
                    DiffUtil.Operation operation = nextOpe = i < diffs.size() - 1 ? diffs.get((int)(i + 1)).operation : null;
                    if (nextOpe == DiffUtil.Operation.INSERT) {
                        sb.append("<b>{</b>5 ");
                    } else {
                        sb.append("<b>{</b>6- ");
                    }
                    sb.append(d.text);
                    sb.append("<b>}</b>");
                    break;
                }
                case INSERT: {
                    if (prevOpe == DiffUtil.Operation.DELETE) {
                        sb.append("<b>{</b>6 ");
                    } else {
                        sb.append("<b>{</b>6+ ");
                    }
                    sb.append(d.text);
                    sb.append("<b>}</b>");
                }
            }
            prevOpe = d.operation;
            ++i;
        }
        String diffline = this.afterDiff(sb.toString());
        diffline = this.dropDiffInSub(diffline);
        this.kanjiKanaAddCount(diffline);
        this.brHrefAddCount(diffline);
        return diffline;
    }

    private int normlen(String line) {
        return line.replaceAll("<[^>]+>", "").length();
    }

    private void dumpKanjiKanaMap() {
        HashMap<String, Integer> kanaMap = this.kanjiKanaCounter.map();
        MultiTreeMap<Integer, String> kanaSorted = this.kanjiKanaCounter.sort();
        Counter<Character> topMap = new Counter<Character>();
        for (String key : kanaMap.keySet()) {
            topMap.add(Character.valueOf(key.charAt(0)));
        }
        MultiTreeMap sortedTop = topMap.sort();
        for (Integer n : sortedTop.keySet()) {
            LinkedList chars = sortedTop.getList(n);
            char prevc = ' ';
            Iterator iterator = chars.iterator();
            block2: while (iterator.hasNext()) {
                char c = ((Character)iterator.next()).charValue();
                for (Integer k : kanaSorted.keySet()) {
                    if (k < 10) continue block2;
                    LinkedList list = kanaSorted.getList(k);
                    for (String txt : list) {
                        if (txt.charAt(0) != c || prevc == c) continue;
                        System.err.println(c);
                        prevc = c;
                        System.err.println("\t" + k + "\t" + txt.replaceAll("<[^>]+>", ""));
                    }
                }
            }
        }
    }

    private void merge() throws IOException {
        System.out.println("<html><body>");
        ArrayList<String> lemmas5 = this.koujien5.getLemmas();
        ArrayList<String> lemmas6 = this.koujien6.getLemmas();
        ArrayList<String> dd5 = this.koujien5.getDD();
        ArrayList<String> dd6 = this.koujien6.getDD();
        int idx5 = 0;
        int idx6 = 0;
        int n = 0;
        int del6 = 0;
        int add6 = 0;
        HashSet<String> ignore = new HashSet<String>();
        while (idx5 < lemmas5.size() && idx6 < lemmas6.size()) {
            String line6;
            String l5 = lemmas5.get(idx5);
            String l6 = lemmas6.get(idx6);
            ++n;
            if (!this.koujien6.hasLemma(l5)) {
                String l6hyoki;
                String v;
                Util.Lemma l5x = new Util.Lemma(l5);
                if (l5x.hyoki != null && l5x.hyoki.startsWith("\u25cb") && (v = this.koujien6.getKana(l6hyoki = l5x.hyoki.substring(1))) != null) {
                    String l6lemma = String.valueOf(v) + "\u3010" + l6hyoki + "\u3011";
                    System.out.println("<dt>" + l5 + "</dt><dd><p>\u21926 " + l6lemma + "</p><p>[5] " + dd5.get(idx5) + "</p></dd>");
                    ignore.add(l6lemma);
                    ++idx5;
                    continue;
                }
                if (this.koujien6.getHyoki(l5x.kana) == null && this.koujien6.getKana(l5x.hyoki) == null) {
                    System.out.println("<dt title=\"" + l5 + " \u25bc6\u7248\u524a\u9664\">" + l5 + "</dt><dd><p><b>6\u7248\u524a\u9664</b></p><p>" + dd5.get(idx5) + "</p></dd>");
                    ++del6;
                }
                ++idx5;
                continue;
            }
            if (!this.koujien5.hasLemma(l6)) {
                Util.Lemma l6x = new Util.Lemma(l6);
                String v = this.koujien5.getHyoki(l6x.kana);
                if (v != null) {
                    if (this.koujien6.getHyoki(l6x.kana) != null) {
                        System.out.println("<dt>" + l6 + "</dt><dd><p>" + l6 + "\u21905\u3010" + v + "\u3011</p><p>[6] " + dd6.get(idx6) + "</p></dd>");
                    } else {
                        System.out.println("<dt title=\"" + l6 + " - 6\u7248\u8ffd\u52a0?\">" + l6 + "</dt><dd><p><b>6\u7248\u8ffd\u52a0?</b></p><p>" + dd6.get(idx6) + "</p></dd>");
                    }
                } else {
                    v = this.koujien5.getKana(l6x.hyoki);
                    if (v != null) {
                        if (this.normKana(v).equals(this.normKana(l6x.kana))) {
                            System.out.println("<dt>" + l6 + "</dt><dd><p>\u21905\uff08" + v + "\uff09</p><p>[6] " + dd6.get(idx6) + "</p></dd>");
                        } else {
                            System.out.println("<dt>" + l6 + "</dt><dd><p>\u21905\uff08" + v + "\uff09" + this.kanaDiff(l6x.hyoki, v, l6x.kana) + "</p><p>[6] " + dd6.get(idx6) + "</p></dd>");
                        }
                    } else if (!ignore.contains(l6)) {
                        System.out.println("<dt title=\"" + l6 + " \u26056\u7248\u8ffd\u52a0\">" + l6 + "</dt><dd><p><b>6\u7248\u8ffd\u52a0</b></p><p>" + dd6.get(idx6) + "</p></dd>");
                    }
                }
                ++add6;
                ++idx6;
                continue;
            }
            String line5 = dd5.get(idx5 = this.findLemmaIdx(l6, lemmas5, idx5));
            String diff = this.diffDD(line5, line6 = dd6.get(idx6));
            int[] ndiff = this.diffCount(diff);
            if (ndiff[0] > 0) {
                ++this.nddchanged;
                System.out.print("<dt>" + l6 + " *" + ndiff[0]);
            } else {
                System.out.print("<dt>" + l6);
            }
            int diffLine = this.diffLineCount(line5, line6);
            if (diffLine > 0) {
                System.out.print(" L+" + diffLine);
            } else if (diffLine < 0) {
                System.out.print(" L" + diffLine);
            }
            System.out.print("</dt><dd>");
            int len5 = this.normlen(line5);
            int len6 = this.normlen(line6);
            if (ndiff[0] > 3 && 100 * ndiff[1] / this.normlen(line6) > 50) {
                System.out.println("<p>" + diff + "</p>");
                System.out.println("<p>- - - - - -</p>");
                System.out.println("<p>[5] " + line5 + "</p>");
                System.out.println("<p>[6] " + line6 + "</p></dd>");
            } else if (len5 > 100 && len5 > 2 * len6 || len6 > 100 && len6 > 2 * len5) {
                System.out.println("<p>[5] " + line5 + "</p>");
                System.out.println("<p>[6] " + line6 + "</p></dd>");
            } else {
                System.out.println("<p>" + diff + "</p></dd>");
            }
            ++idx5;
            ++idx6;
        }
        System.out.println("</body></html>");
        System.err.println("made " + n + " lemmas, del6=" + del6 + ", add6=" + add6);
        System.err.println("ndd=" + this.ndd + ", nddchanged=" + this.nddchanged);
        System.err.println("diff6drop=" + this.diff6drop + ", diff6add=" + this.diff6add + ", diff56change=" + this.diff56change);
        System.err.println("nHanZen=" + this.nHanZen + ", nZenHan=" + this.nZenHan + ", nhitotsu=" + this.nhitotsu + ", nSub=" + this.nSub + ", kana56fix=" + this.kan56fix + ", kanjiKanaAdd=" + this.kanjiKanaAdd + ", nbar5=" + this.nbar5 + ", nbar6=" + this.nbar6 + ", brHrefAdd=" + this.brHrefAdd);
        this.dumpKanjiKanaMap();
    }

    public static void main(String[] args) throws Exception {
        MergeBody app = new MergeBody();
        app.merge();
    }
}

