/*
 * Decompiled with CFR 0.152.
 */
package zephyr.kenkyusya.lajp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import zephyr.util.ZephyrUtil;

public class CheckLatinWords {
    private final HashSet<String> heads = new HashSet();
    private final HashSet<String> morphs = new HashSet();
    private static final String DT_ID_REGEXP = "<dt id=\"([^ \"\\[]+)";
    private static final Pattern DT_ID = Pattern.compile("<dt id=\"([^ \"\\[]+)");
    private static final String WORD_REGEXP = "([a-zA-Z]+)";
    private static final Pattern WORD = Pattern.compile("([a-zA-Z]+)");

    CheckLatinWords() {
        this.morphs.add("I");
        this.morphs.add("II");
        this.morphs.add("III");
        this.morphs.add("Gk");
        this.morphs.add("adj");
        this.morphs.add("adv");
        this.morphs.add("sc");
        this.morphs.add("dim");
        this.morphs.add("alqm");
        this.morphs.add("alqo");
        this.morphs.add("alqd");
        this.morphs.add("neut");
        this.morphs.add("pref");
        this.morphs.add("pref");
        this.morphs.add("pp");
        this.morphs.add("intr");
        this.morphs.add("dep");
        this.morphs.add("pf");
        this.morphs.add("inf");
        this.morphs.add("acc");
        this.morphs.add("cf");
        this.morphs.add("con");
        this.morphs.add("abl");
        this.morphs.add("freq");
        this.morphs.add("prp");
        this.morphs.add("inch");
        this.morphs.add("pass");
        this.morphs.add("indecl");
        this.morphs.add("sg");
        this.morphs.add("refl");
        this.morphs.add("prep");
        this.morphs.add("semi");
        this.morphs.add("conj");
        this.morphs.add("impers");
        this.morphs.add("subj");
        this.morphs.add("comp");
        this.morphs.add("int");
        this.morphs.add("pron");
        this.morphs.add("fut");
        this.morphs.add("superl");
        this.morphs.add("bi");
        this.morphs.add("tri");
        this.morphs.add("card");
        this.morphs.add("interrog");
        this.morphs.add("relat");
        this.morphs.add("ord");
        this.morphs.add("distrib");
        this.morphs.add("nom");
        this.morphs.add("impr");
        this.morphs.add("gerundiv");
        this.morphs.add("intens");
        this.morphs.add("indef");
        this.morphs.add("us");
        this.morphs.add("pers");
        this.morphs.add("idem");
        this.morphs.add("quisque");
        this.morphs.add("lacio");
        this.morphs.add("amb");
        this.morphs.add("cu");
        this.morphs.add("ejus");
        this.morphs.add("pleo");
        this.morphs.add("impf");
        this.morphs.add("alqos");
        this.morphs.add("trav");
        this.morphs.add("paul");
        this.morphs.add("deos");
        this.morphs.add("um");
        this.morphs.add("sup");
        this.morphs.add("loc");
        this.morphs.add("voc");
        this.morphs.add("uterque");
        this.morphs.add("tral");
        this.morphs.add("quisquam");
        this.morphs.add("Cic");
        this.morphs.add("Ov");
    }

    private void loadMorph(String morphFile) throws Exception {
        String line;
        FileInputStream in = new FileInputStream(new File(morphFile));
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)in, "ASCII"));
        while ((line = br.readLine()) != null) {
            int idx = line.indexOf(44);
            int idx2 = line.indexOf(32, idx + 1);
            String morph = line.substring(0, idx);
            String head = line.substring(idx + 1, idx2);
            this.heads.add(head);
            this.morphs.add(morph);
        }
        System.err.println("head=" + this.heads.size() + " entries, morphs=" + this.morphs.size() + " entries");
    }

    private void loadGeorgesLaDe(String georgFile) throws Exception {
        String line;
        FileInputStream in = new FileInputStream(new File(georgFile));
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)in, "Windows-31J"));
        int n = this.heads.size();
        while ((line = br.readLine()) != null) {
            Matcher m = DT_ID.matcher(line);
            if (!m.find()) continue;
            this.heads.add(m.group(1));
        }
        System.err.println("head=" + this.heads.size() + " entries, added " + (this.heads.size() - n) + " entries");
    }

    private void loadBody(String bodyFile) throws Exception {
        String line;
        FileInputStream in = new FileInputStream(new File(bodyFile));
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)in, "UTF-8"));
        TreeSet<String> unknown = new TreeSet<String>();
        String prevhead = "";
        while ((line = br.readLine()) != null) {
            if (line.trim().isEmpty()) continue;
            String dropline = ZephyrUtil.dropMarks(line);
            Matcher m = WORD.matcher(dropline);
            boolean bFirst = true;
            while (m.find()) {
                String word = m.group(1);
                if (line.charAt(0) != ' ' && bFirst) {
                    if (word.compareToIgnoreCase(prevhead) < 0) {
                        System.out.println("###\tprev=" + prevhead + ", cur=" + word);
                    }
                    prevhead = word;
                    bFirst = false;
                }
                if (word.length() <= 1 || m.start(1) > 0 && dropline.charAt(m.start(1) - 1) == '-' || this.heads.contains(word) || this.morphs.contains(word)) continue;
                unknown.add(word);
            }
        }
        for (String w : unknown) {
            System.out.println(w);
        }
    }

    public static void main(String[] args) {
        if (args.length >= 3) {
            CheckLatinWords app = new CheckLatinWords();
            try {
                app.loadMorph(args[1]);
                app.loadGeorgesLaDe(args[2]);
                app.loadBody(args[0]);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("Usage: java CheckLatinWords kenkyusya-lajp.txt whitaker.morph.txt georges-ldhd-body.html");
        }
    }
}

