﻿using System;
using System.IO;
using System.Text;

namespace Dobon
{
    class Jcode
    {
        private const byte bESC = 0x1B;
        private const byte bAT = 0x40;
        private const byte bDollar = 0x24;
        private const byte bAnd = 0x26;
        private const byte bOP = 0x28;    //(
        private const byte bB = 0x42;
        private const byte bD = 0x44;
        private const byte bJ = 0x4A;
        private const byte bI = 0x49;

        private const int pre_bytes = 1;
        private const int post_bytes = 5;

        private byte[] buf = new byte[16 * 1024];

        private int binary;
        private int ucs2;
        private int sjis;
        private int euc;
        private int utf8;

        private void reset()
        {
            binary = 0;
            ucs2 = 0;
            sjis = 0;
            euc = 0;
            utf8 = 0;
        }

        /// <summary>
        /// 文字コードを判別する
        /// </summary>
        /// <remarks>
        /// Jcode.pmのgetcodeメソッドを移植したものです。
        /// Jcode.pm(http://openlab.ring.gr.jp/Jcode/index-j.html)
        /// </remarks>
        /// <param name="byts">文字コードを調べるデータ</param>
        /// <returns>適当と思われるEncodingオブジェクト。
        /// 判断できなかった時はnull。</returns>
        public Encoding GetCode( Stream stream )
        {
            reset();

            int len = buf.Length;

            int begin = 0;
            int end = 0;
            int end_buf = 0;
            while (true) {
                int pos = 0;
                {// copy pre bytes
                    int _begin = Math.Max( begin, end - pre_bytes );
                    for (int n = _begin; n < end; ++n) {
                        buf[pos++] = buf[n];
                    }
                    begin = pos;
                }
                {// copy post (look ahead) bytes
                    for (int n = end; n < end_buf; ++n) {
                        buf[pos++] = buf[n];
                    }
                }
                // read
                end_buf = pos + stream.Read( buf, pos, len - pos );
                end = Math.Min( len - post_bytes, end_buf );

                if (begin == end) { break; }

                Encoding enc = GetCode( buf, begin, end, end_buf );
                if (enc != null) {
                    return enc;
                }
            }

            if (euc > sjis && euc > utf8) {
                //EUC
                return Encoding.GetEncoding( 51932 );
            } else if (sjis > euc && sjis > utf8) {
                //SJIS
                return Encoding.GetEncoding( 932 );
            } else if (utf8 > euc && utf8 > sjis) {
                //UTF8
                return Encoding.UTF8;
            }

            return Encoding.Default;
        }

        private Encoding GetCode( byte[] byts, int begin, int end, int end_buf )
        {
            for (int i = begin; i < end; i++) {
                if (byts[i] <= 0x06 || byts[i] == 0x7F || byts[i] == 0xFF) {
                    //'binary'
                    binary++;
                    if (0 < i && i < end - 1 && byts[i - 1] <= 0x7F && byts[i] == 0x00) {
                        //smells like raw unicode
                        ucs2++;
                    }
                }
            }

            if (binary > 0) {
                if (ucs2 > 0) {
                    //JIS
                    //ucs2(Unicode)
                    return Encoding.Unicode;
                } else {
                    //binary
                    //return null;
                    return Encoding.UTF8;
                }
            }

            for (int i = begin; i < end - 1; i++) {
                byte b1 = byts[i];
                byte b2 = byts[i + 1];

                if (b1 != bESC) { continue; }

                if (b2 >= 0x80) {
                    //not Japanese
                    //ASCII
                    return Encoding.ASCII;
                } else if (i + 2 < end_buf) {
                    byte b3 = byts[i + 2];
                    if (b2 == bDollar && b3 == bAT) {
                        //JIS_0208 1978 --> JIS
                        return Encoding.GetEncoding( 50220 );
                    } else if (b2 == bDollar && b3 == bB) {
                        //JIS_0208 1983 --> JIS
                        return Encoding.GetEncoding( 50220 );
                    } else if (i + 5 < end_buf && b2 == bAnd && b3 == bAT && byts[i + 3] == bESC && byts[i + 4] == bDollar && byts[i + 5] == bB) {
                        //JIS_0208 1990 --> JIS
                        return Encoding.GetEncoding( 50220 );
                    } else if (i + 3 < end_buf && b2 == bDollar && b3 == bOP && byts[i + 3] == bD) {
                        //JIS_0212 --> JIS
                        return Encoding.GetEncoding( 50220 );
                    } else if (b2 == bOP && (b3 == bB || b3 == bJ)) {
                        //JIS_ASC --> JIS
                        return Encoding.GetEncoding( 50220 );
                    } else if (b2 == bOP && b3 == bI) {
                        //JIS_KANA --> JIS
                        return Encoding.GetEncoding( 50220 );
                    }
                }
            }

            for (int i = begin; i < end - 1; i++) {
                byte b1 = byts[i];
                byte b2 = byts[i + 1];
                if (((0x81 <= b1 && b1 <= 0x9F) || (0xE0 <= b1 && b1 <= 0xFC)) &&
                    ((0x40 <= b2 && b2 <= 0x7E) || (0x80 <= b2 && b2 <= 0xFC))) {
                    sjis += 2;
                    i++;
                }
            }

            for (int i = begin; i < end - 1; i++) {
                byte b1 = byts[i];
                byte b2 = byts[i + 1];
                if (((0xA1 <= b1 && b1 <= 0xFE) && (0xA1 <= b2 && b2 <= 0xFE)) ||
                    (b1 == 0x8E && (0xA1 <= b2 && b2 <= 0xDF))) {
                    euc += 2;
                    i++;
                } else if (i + 2 < end_buf) {
                    byte b3 = byts[i + 2];
                    if (b1 == 0x8F && (0xA1 <= b2 && b2 <= 0xFE) && (0xA1 <= b3 && b3 <= 0xFE)) {
                        euc += 3;
                        i += 2;
                    }
                }
            }

            for (int i = begin; i < end - 1; i++) {
                byte b1 = byts[i];
                byte b2 = byts[i + 1];
                if ((0xC0 <= b1 && b1 <= 0xDF) && (0x80 <= b2 && b2 <= 0xBF)) {
                    utf8 += 2;
                    i++;
                } else if (i + 2 < end_buf - 2) {
                    byte b3 = byts[i + 2];
                    if ((0xE0 <= b1 && b1 <= 0xEF) && (0x80 <= b2 && b2 <= 0xBF) && (0x80 <= b3 && b3 <= 0xBF)) {
                        utf8 += 3;
                        i += 2;
                    }
                }
            }

            return null;
        }
    }
}
