/*
 * Decompiled with CFR 0.152.
 */
package org.basex.util.ft;

import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import org.basex.io.IOFile;
import org.basex.util.FTToken;
import org.basex.util.Prop;
import org.basex.util.Reflect;
import org.basex.util.Strings;
import org.basex.util.Token;
import org.basex.util.TokenBuilder;
import org.basex.util.Util;
import org.basex.util.ft.FTCase;
import org.basex.util.ft.FTFlag;
import org.basex.util.ft.FTOpt;
import org.basex.util.ft.FTSpan;
import org.basex.util.ft.Language;
import org.basex.util.ft.Tokenizer;
import org.basex.util.ft.WesternTokenizer;

final class JapaneseTokenizer
extends Tokenizer {
    private static boolean available = true;
    private static final String PATTERN = "net.reduls.igo.Tagger";
    private static final String LANG = "ja";
    private static final String MEISHI = "\u540d\u8a5e";
    private static final String RENTAISHI = "\u9023\u4f53\u8a5e";
    private static final String HUKUSHI = "\u526f\u8a5e";
    private static final String DOUSHI = "\u52d5\u8a5e";
    private static final String SETSUZOKUSHI = "\u63a5\u7d9a\u8a5e";
    private static final String SETTOUSHI = "\u63a5\u982d\u8a5e";
    private static final String JYODOUSHI = "\u52a9\u52d5\u8a5e";
    private static final String JYOSHI = "\u52a9\u8a5e";
    private static final String KEIYOUSHI = "\u5f62\u5bb9\u8a5e";
    private static final String KIGOU = "\u8a18\u53f7";
    private static final String KANDOUSHI = "\u611f\u52d5\u8a5e";
    private static final String FILLER = "\u30d5\u30a3\u30e9\u30fc";
    private static final String KIGOU_FEATURE = "\u8a18\u53f7,*,*,*,*,*,*,*,*";
    private static final String MEISHI_FEATURE = "\u540d\u8a5e,*,*,*,*,*,*,*,*";
    private static Object tagger;
    private static Method parse;
    private static Field surface;
    private static Field feature;
    private static Field start;
    private Iterator<Morpheme> tokens;
    private ArrayList<Morpheme> tokenList = new ArrayList();
    private int cpos;
    private Morpheme currToken;
    private final FTCase cs;
    private final boolean dc;
    private final boolean wc;
    private final boolean st;
    private int pos = -1;
    private boolean sc;

    static {
        IOFile dic = null;
        if (Reflect.available(PATTERN, new Object[0])) {
            dic = new IOFile(LANG);
            if (!dic.exists() && !(dic = new IOFile(Prop.HOMEDIR, "etc/ja")).exists()) {
                available = false;
            }
        } else {
            available = false;
        }
        if (available) {
            Class<?> clz = Reflect.find(PATTERN);
            if (clz == null) {
                Util.debug("Could not initialize Igo Japanese lexer.", new Object[0]);
            } else {
                Constructor<?> tgr = Reflect.find(clz, String.class);
                tagger = Reflect.get(tgr, dic.path());
                if (tagger == null) {
                    available = false;
                    Util.debug("Could not initialize Igo Japanese lexer.", new Object[0]);
                } else {
                    parse = Reflect.method(clz, "parse", CharSequence.class);
                    if (parse == null) {
                        Util.debug("Could not initialize Igo lexer method.", new Object[0]);
                    }
                    clz = Reflect.find("net.reduls.igo.Morpheme");
                    surface = Reflect.field(clz, "surface");
                    feature = Reflect.field(clz, "feature");
                    start = Reflect.field(clz, "start");
                }
            }
        }
    }

    static boolean available() {
        return available;
    }

    JapaneseTokenizer(FTOpt fto) {
        this.cs = fto != null && fto.cs != null ? fto.cs : FTCase.INSENSITIVE;
        this.wc = fto != null && fto.is(FTFlag.WC);
        this.dc = fto != null && fto.is(FTFlag.DC);
        this.st = fto != null && fto.is(FTFlag.ST);
    }

    @Override
    Tokenizer get(FTOpt f) {
        return new JapaneseTokenizer(f);
    }

    @Override
    public JapaneseTokenizer init(byte[] txt) {
        String source = Token.string(txt);
        if (this.wc) {
            source = source.replace('\u3000', ' ');
        }
        ArrayList morpheme = (ArrayList)Reflect.invoke(parse, tagger, source);
        ArrayList<Morpheme> list = new ArrayList<Morpheme>();
        try {
            int prev = 0;
            int ms = morpheme.size();
            int i = 0;
            while (i < ms) {
                int l;
                Object m = morpheme.get(i);
                String srfc = surface.get(m).toString();
                String ftr = feature.get(m).toString();
                int strt = start.getInt(m);
                if (i != 0 && (l = strt - prev) != 0) {
                    list.add(new Morpheme(source.substring(strt - 1, strt + l - 1), KIGOU_FEATURE));
                }
                prev = srfc.length() + strt;
                boolean cont = true;
                ArrayList<Morpheme> marks = new ArrayList<Morpheme>();
                int sl = srfc.length();
                int s = 0;
                while (s < sl) {
                    String c = String.valueOf(srfc.charAt(s));
                    byte[] t = Token.token(c);
                    if (t.length == 1) {
                        if (Token.letter(t[0]) || Token.digit(t[0])) {
                            cont = false;
                        } else {
                            marks.add(new Morpheme(c, KIGOU_FEATURE));
                        }
                    } else {
                        cont = false;
                    }
                    ++s;
                }
                if (cont) {
                    list.addAll(marks);
                } else {
                    list.add(new Morpheme(srfc, ftr));
                }
                ++i;
            }
        }
        catch (Exception ex) {
            Util.errln(String.valueOf(Util.className(this)) + ": " + ex, new Object[0]);
        }
        this.tokenList = list;
        this.tokens = list.iterator();
        return this;
    }

    private static boolean isFtChar(String s) {
        return ".".equals(s) || "?".equals(s) || "*".equals(s) || "+".equals(s) || "\\".equals(s) || "{".equals(s) || "}".equals(s);
    }

    /*
     * Unable to fully structure code
     */
    private boolean moreWC() {
        word = new StringBuilder();
        size = this.tokenList.size();
        period = false;
        bs = false;
        more = false;
        while (this.cpos < size) {
            block20: {
                block18: {
                    block22: {
                        block21: {
                            block19: {
                                cSrfc = this.tokenList.get(this.cpos).getSurface();
                                cMark = this.tokenList.get(this.cpos).isMark();
                                nSrfc = null;
                                nMark = false;
                                if (this.cpos < size - 1) {
                                    nSrfc = this.tokenList.get(this.cpos + 1).getSurface();
                                    nMark = this.tokenList.get(this.cpos + 1).isMark();
                                }
                                if (nSrfc == null) break block18;
                                if ("\\".equals(cSrfc)) {
                                    bs = true;
                                }
                                if ((!cMark || JapaneseTokenizer.isFtChar(cSrfc)) && (!"\\".equals(cSrfc) || !nMark)) break block19;
                                period = false;
                                bs = false;
                                if (word.length() != 0) {
                                    more = true;
                                    break;
                                }
                                if ("\\".equals(cSrfc) && nMark) {
                                    ++this.cpos;
                                }
                                break block20;
                            }
                            word.append(cSrfc);
                            if (!bs && !"\\".equals(nSrfc)) break block21;
                            more = true;
                            break block20;
                        }
                        if (!".".equals(cSrfc) && !".".equals(nSrfc)) break block22;
                        period = true;
                        break block20;
                    }
                    if (!period) ** GOTO lbl-1000
                    if ("{".equals(cSrfc)) {
                        ++this.cpos;
                        while (this.cpos < size) {
                            cSrfc = this.tokenList.get(this.cpos).getSurface();
                            word.append(cSrfc);
                            if ("}".equals(cSrfc)) {
                                more = true;
                                break;
                            }
                            ++this.cpos;
                        }
                        ++this.cpos;
                        break;
                    }
                    break block20;
                }
                if (cMark) {
                    if (!"\\".equals(cSrfc)) {
                        if (word.length() != 0) {
                            word.append(cSrfc);
                        }
                        more = true;
                    }
                } else lbl-1000:
                // 2 sources

                {
                    if (period) {
                        word.append(cSrfc);
                    } else if (bs) {
                        if (!JapaneseTokenizer.isFtChar(cSrfc)) {
                            word.append(cSrfc);
                        } else {
                            word.setLength(0);
                        }
                    }
                    more = true;
                    ++this.cpos;
                    break;
                }
            }
            ++this.cpos;
        }
        if (more) {
            this.currToken = word.length() == 0 ? this.tokenList.get(this.cpos - 1) : new Morpheme(word.toString(), "\u540d\u8a5e,*,*,*,*,*,*,*,*");
        }
        return more;
    }

    /*
     * Unable to fully structure code
     */
    private boolean more() {
        if (!this.all) ** GOTO lbl6
        return this.tokens.hasNext();
lbl-1000:
        // 1 sources

        {
            this.currToken = this.tokens.next();
            if (this.currToken.isMark() || this.currToken.isAttachedWord()) continue;
            return true;
lbl6:
            // 2 sources

            ** while (this.tokens.hasNext())
        }
lbl7:
        // 1 sources

        return false;
    }

    @Override
    public boolean hasNext() {
        return this.wc ? this.moreWC() : this.more();
    }

    @Override
    public FTSpan next() {
        return new FTSpan(this.nextToken(), this.pos, this.sc);
    }

    private byte[] get() {
        byte[] t;
        boolean a;
        ++this.pos;
        String n = this.currToken.getSurface();
        int hinshi = this.currToken.getHinshi();
        if (this.st && (hinshi == 4 || hinshi == 8)) {
            n = this.currToken.getBaseForm();
        }
        if (!(a = Token.ascii(t = Token.token(n))) && !this.dc) {
            t = FTToken.noDiacritics(t);
        }
        if (this.cs == FTCase.UPPER) {
            t = WesternTokenizer.upper(t, a);
        } else if (this.cs != FTCase.SENSITIVE) {
            t = WesternTokenizer.lower(t, a);
        }
        return JapaneseTokenizer.toHankaku(t);
    }

    private byte[] getSC() {
        Morpheme m = this.tokens.next();
        String n = m.getSurface();
        if (m.isMark() || m.isAttachedWord()) {
            this.sc = true;
        } else {
            ++this.pos;
            this.sc = false;
        }
        return Token.token(n);
    }

    @Override
    public byte[] nextToken() {
        return this.original ? this.getSC() : this.get();
    }

    @Override
    protected byte prec() {
        return 20;
    }

    @Override
    Collection<Language> languages() {
        return JapaneseTokenizer.collection(LANG);
    }

    private static byte[] toHankaku(byte[] text) {
        if (Token.ascii(text)) {
            return text;
        }
        int tl = text.length;
        TokenBuilder tb = new TokenBuilder(tl);
        int t = 0;
        while (t < tl) {
            int c = Token.cp(text, t);
            if (c >= 65296 && c <= 65305 || c >= 65313 && c <= 65338 || c >= 65345 && c <= 65370) {
                tb.add(c - 65248);
            } else if (c == 12288) {
                tb.add(32);
            } else if (c == 65281) {
                tb.add(33);
            } else if (c == 65282) {
                tb.add(34);
            } else if (c == 8220) {
                tb.add(34);
            } else if (c == 8221) {
                tb.add(34);
            } else if (c == 65283) {
                tb.add(35);
            } else if (c == 65284) {
                tb.add(36);
            } else if (c == 65285) {
                tb.add(37);
            } else if (c == 65286) {
                tb.add(38);
            } else if (c == 65287) {
                tb.add(39);
            } else if (c == 8216) {
                tb.add(39);
            } else if (c == 8217) {
                tb.add(39);
            } else if (c == 65288) {
                tb.add(40);
            } else if (c == 65289) {
                tb.add(41);
            } else if (c == 65290) {
                tb.add(42);
            } else if (c == 65291) {
                tb.add(43);
            } else if (c == 65292) {
                tb.add(44);
            } else if (c == 65293) {
                tb.add(45);
            } else if (c == 65294) {
                tb.add(46);
            } else if (c == 65295) {
                tb.add(47);
            } else if (c == 65306) {
                tb.add(58);
            } else if (c == 65307) {
                tb.add(59);
            } else if (c == 65308) {
                tb.add(60);
            } else if (c == 65309) {
                tb.add(61);
            } else if (c == 65310) {
                tb.add(62);
            } else if (c == 65311) {
                tb.add(63);
            } else if (c == 65312) {
                tb.add(64);
            } else if (c == 65339) {
                tb.add(91);
            } else if (c == 65509) {
                tb.add(92);
            } else if (c == 65341) {
                tb.add(93);
            } else if (c == 65342) {
                tb.add(94);
            } else if (c == 65343) {
                tb.add(95);
            } else if (c == 65344) {
                tb.add(96);
            } else if (c == 65371) {
                tb.add(123);
            } else if (c == 65372) {
                tb.add(124);
            } else if (c == 65373) {
                tb.add(125);
            } else if (c == 65374) {
                tb.add(126);
            } else {
                tb.add(c);
            }
            t += Token.cl(text, t);
        }
        return tb.finish();
    }

    private static final class Morpheme {
        private static final int HINSHI_MEISHI = 1;
        private static final int HINSHI_RENTAISHI = 2;
        private static final int HINSHI_HUKUSHI = 3;
        private static final int HINSHI_DOUSHI = 4;
        private static final int HINSHI_SETSUZOKUSHI = 5;
        private static final int HINSHI_JYODOUSHI = 6;
        private static final int HINSHI_JYOSHI = 7;
        private static final int HINSHI_KEIYOUSHI = 8;
        private static final int HINSHI_KIGOU = 9;
        private static final int HINSHI_KANDOUSHI = 10;
        private static final int HINSHI_FILLER = 11;
        private static final int HINSHI_SETTOUSHI = 12;
        private static final int HINSHI_SONOTA = 0;
        private final String mSurface;
        private final String mFeature;

        private Morpheme(String srfc, String ftr) {
            this.mSurface = srfc;
            this.mFeature = ftr;
        }

        public String getSurface() {
            return this.mSurface;
        }

        public boolean isMark() {
            int hinshi = this.getHinshi();
            return hinshi == 9 || hinshi == 11;
        }

        public boolean isAttachedWord() {
            int hinshi = this.getHinshi();
            return hinshi == 6 || hinshi == 7;
        }

        public int getHinshi() {
            int hinshi;
            block41: {
                String h;
                block40: {
                    byte[] s = Token.token(this.mSurface);
                    if (s.length != 1 || Token.letter(s[0]) || Token.digit(s[0])) break block40;
                    hinshi = 9;
                    break block41;
                }
                switch (h = this.getPos()) {
                    case "\u540d\u8a5e": {
                        hinshi = 1;
                        break;
                    }
                    case "\u9023\u4f53\u8a5e": {
                        hinshi = 2;
                        break;
                    }
                    case "\u526f\u8a5e": {
                        hinshi = 3;
                        break;
                    }
                    case "\u52d5\u8a5e": {
                        hinshi = 4;
                        break;
                    }
                    case "\u63a5\u7d9a\u8a5e": {
                        hinshi = 5;
                        break;
                    }
                    case "\u63a5\u982d\u8a5e": {
                        hinshi = 12;
                        break;
                    }
                    case "\u52a9\u52d5\u8a5e": {
                        hinshi = 6;
                        break;
                    }
                    case "\u52a9\u8a5e": {
                        hinshi = 7;
                        break;
                    }
                    case "\u5f62\u5bb9\u8a5e": {
                        hinshi = 8;
                        break;
                    }
                    case "\u8a18\u53f7": {
                        hinshi = 9;
                        break;
                    }
                    case "\u611f\u52d5\u8a5e": {
                        hinshi = 10;
                        break;
                    }
                    case "\u30d5\u30a3\u30e9\u30fc": {
                        hinshi = 11;
                        break;
                    }
                    default: {
                        hinshi = 0;
                    }
                }
            }
            return hinshi;
        }

        public String getBaseForm() {
            return Strings.split(this.mFeature, ',')[6];
        }

        private String getPos() {
            return Strings.split(this.mFeature, ',')[0];
        }

        public String toString() {
            return this.mSurface;
        }
    }
}

