using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

using SlothLib.NLP;
using System.Text.RegularExpressions;
using System.IO;

namespace SlothLibTests
{
    public partial class FormSandBoxInagawa : Form
    {
        public FormSandBoxInagawa()
        {
            InitializeComponent();
        }

        private void btnStart_Click(object sender, EventArgs e)
        {
            //EnglishTokenizer tokenize = new EnglishTokenizer();
            //string[] result = tokenize.DoTokenize(richTextBox1.Text);
            //StringBuilder builder = new StringBuilder();
            //foreach (string s in result)
            //{
            //    builder.AppendLine(s);
            //}
            //richTextBox2.Text = builder.ToString();

            TreeTagger tagger = new TreeTagger(@"C:\Data\Downloads\TreeTagger\bin");
            TreeTaggerResult result = tagger.DoAnalyze(richTextBox1.Text);
            StringBuilder builder = new StringBuilder();
            foreach (Morpheme m in result.Morphemes)
            {
                builder.Append(m.Raw);
                builder.Append("\t");
                builder.Append(m.Original);
                builder.Append("\t");
                builder.AppendLine(m.POS);
            }
            richTextBox2.Text = builder.ToString();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            List<string> list = getAnalyzedStrings(richTextBox1.Text);
            StringBuilder builder = new StringBuilder();
            foreach (string s in list)
            {
                //builder.AppendLine(s);
            }
            richTextBox2.Text = builder.ToString();

            SSTaggerServer.SSTaggerPath = @"C:\Data\Downloads\SSTagger\tagger.exe";
            SSTaggerServer.DoStemming = true;
            string text = richTextBox1.Text;
            foreach (Morpheme w in SSTaggerServer.DoSSTagger(text).Morphemes)
            {
                builder.AppendLine(w.ToString());
            }
            richTextBox2.Text = builder.ToString();

            //EnglishTokenizer tokenizer = new EnglishTokenizer();
            //string[] strs = tokenizer.DoTokenize(richTextBox1.Text);
            //PorterStemmerFilter psf = new PorterStemmerFilter();
            ////strs = psf.DoFilter(strs);

            //foreach (string s in strs)
            //{
            //    builder.AppendLine(s);
            //}
            //richTextBox2.Text = builder.ToString();
        }

        #region getAnalyzedStrings
        public static List<string> getAnalyzedStrings(string src)
        {
            src = Filter.AlphaDigitToNarrowHalfKanaToWide(src);

            //IMorphemeSequence result = MeCabServer.DoMeCab(src);
            IMorphologicalAnalyzer analyzer = new MeCab();
            IMorphologicalAnalyzerResult result = analyzer.DoAnalyze(src);

            MorphemeFilterList filterList = new MorphemeFilterList();
            filterList.Add(new PosFilter("|`e|`e|,|m", ",㖼"));
            filterList.Add(new RemainOriginalFilter());
            //filterList.Add(new AlphaDigitToNarrowHalfKanaToWideFilter());

            StopWordFilter swFilter = new StopWordFilter();
            swFilter.LoadSymbolList(@"C:\svn\SlothLib\SlothLibDll\latest\StopWord\symbol");
            swFilter.LoadWordList(@"C:\svn\SlothLib\SlothLibDll\latest\StopWord\word\");
            swFilter.AddToWordList("", "v", "", "", "X", "Ƃ킴", "", "i", "", "Ȃ",
                                 "", "ɂ");
            filterList.Add(swFilter);

            Regex exclude = new Regex("^[a-zA-Z]+$", RegexOptions.Compiled);
            RegexFilter regexFilter = new RegexFilter(null, exclude); //p̂̂폜
            filterList.Add(regexFilter);

            return new List<string>(filterList.DoMorphemeToStringFilter(result.Morphemes));
        }
        #endregion

    }
}