using System;
using System.Collections.Generic;
using System.Text;
using System.IO;

namespace SlothLib.NLP
{
    /// <summary>
    /// Xgbv[hOtB^
    /// </summary>
	public class StopWordFilter : AbstractStringFilter
	{


		#region private tB[h

		// Xgbv[hێDictionary
		private Dictionary<string, bool> wordList;

		// LێList
		private List<string> symbolList;

		#endregion


		#region RXgN^

		/// <summary>
		/// RXgN^
		/// </summary>
		public StopWordFilter()
		{
			wordList = new Dictionary<string, bool>();
			symbolList = new List<string>();
		}

		#endregion

        /// <summary>
        /// (word)͊SvŏOAL(symbol)͕vŏO
        /// </summary>
        /// <param name="str">tB^Kp镶</param>
        /// <returns>tB^Kp̕</returns>
		public override string DoFilter(string str)
		{
			// ɂ
			string lower= str.ToLower();

			// Xg`FbN
			if (wordList.ContainsKey(lower))
			{
				return null;
			}

			// L`FbN
			foreach (string s in symbolList)
			{
				// Xgbv[hƔ肳ꂽ炳悤ȂB
				if (lower.IndexOf(s) >= 0)
				{
					return null;
				}
			}

			// i
			return str;
		}



		#region Add, Remove, Clear

		/// <summary>
		/// [hXgɒǉ
		/// </summary>
		/// <param name="words">ǉ</param>
		public void AddToWordList(params string[] words)
		{
			foreach (string word in words)
			{
				if ((word.Length > 0) && (!wordList.ContainsKey(word)))
				{
					// [hXgɒǉifalseɂ͈Ӗ͑SȂBj
					wordList.Add(word, false);
				}
			}
		}

		/// <summary>
		/// [hXg폜
		/// </summary>
		/// <param name="words">폜</param>
		public void RemoveFromWordList(params string[] words)
		{
			foreach (string word in words)
			{
				if (wordList.ContainsKey(word))
				{
					// [hXg폜
					wordList.Remove(word);
				}
			}
		}

		/// <summary>
		/// [hXgNA
		/// </summary>
		public void ClearWordList()
		{
			wordList.Clear();
		}

		/// <summary>
		/// LLXgɒǉ
		/// </summary>
		/// <param name="words">ǉL</param>
		public void AddToSymbolList(params string[] words)
		{
			foreach (string word in words)
			{
				if ((word.Length > 0) && (!symbolList.Contains(word)))
				{
					// V{Xgɒǉ
					symbolList.Add(word);
				}
			}
		}

		/// <summary>
		/// LLXg폜
		/// </summary>
		/// <param name="words">폜L</param>
		public void RemoveFromSymbolList(params string[] words)
		{
			foreach (string word in words)
			{
				if (symbolList.Contains(word))
				{
					// [hXg폜
					symbolList.Remove(word);
				}
			}
		}

		/// <summary>
		/// LXgNA
		/// </summary>
		public void ClearSymbolList()
		{
			symbolList.Clear();
		}

		#endregion



		#region LoadWordList

		/// <summary>
		/// UTF-8ŕۑꂽXgbv[h̃Xgt@CǂݍށB
		/// SɈvꍇɃXgbv[hƔ肷B
		/// p͏ɒĂ烊XǧƔrB
		/// </summary>
		/// <param name="filePaths">t@C̃pXz</param>
		/// <returns>SĖȂǂݍ݂łꍇtrueAȂ炩̖肪ꍇfalseԂBOԂƂ͂܂ȂB</returns>
		public bool LoadWordList(string[] filePaths)
		{
			bool result = true;
			//wordList.Clear();

			foreach (string path in filePaths)
			{
				if (File.Exists(path))
				{
					using (StreamReader sr = File.OpenText(path))
					{
						try
						{
							string str;
							while ((str = sr.ReadLine()) != null)
							{
								if ((str.Length > 0) && (!wordList.ContainsKey(str)))
								{
									// [hXgɒǉifalseɂ͈Ӗ͑SȂBj
									wordList.Add(str, false);
								}
							}
							sr.Close();
						}
						catch
						{
							result = false;
						}
					}
				}
				else
				{
					result = false;
					//throw new FileNotFoundException("w肳ꂽXgbv[hXg̃t@C݂܂B", path);
				}
			}

			return result;
		}

		/// <summary>
		/// UTF-8ŕۑꂽXgbv[h̃Xgt@CǂݍށB
		/// SɈvꍇɃXgbv[hƔ肷B
		/// p͏ɒĂ烊XǧƔrB
		/// </summary>
		/// <param name="dirPath">Xgbv[h𗅗񂵂eLXgt@CۑꂽfBNg̃pX</param>
		public void LoadWordList(string dirPath)
		{
			if (!Directory.Exists(dirPath))
			{
				throw new DirectoryNotFoundException("w肳ꂽXgbv[hXg̃fBNg݂܂B");
			}
			else
			{
				LoadWordList(Directory.GetFiles(dirPath));
			}
		}

		#endregion


		#region LoadSymbolList

		/// <summary>
		/// LXg[hB
		/// L܂܂́AⓚpŃXgbv[hƔ肷B
		/// </summary>
		/// <param name="filePaths">t@C̃pXz</param>
		/// <returns>SĖȂǂݍ݂łꍇtrueAȂ炩̖肪ꍇfalseԂBOԂƂ͂܂ȂB</returns>
		public bool LoadSymbolList(string[] filePaths)
		{
			bool result = true;
			//symbolList.Clear();

			foreach (string path in filePaths)
			{
				if (File.Exists(path))
				{
					using (StreamReader sr = File.OpenText(path))
					{
						try
						{
							string str;
							while ((str = sr.ReadLine()) != null)
							{
								if ((str.Length > 0) && (!symbolList.Contains(str)))
								{
									// V{Xgɒǉ
									symbolList.Add(str);
								}
							}
							sr.Close();
						}
						catch {
							result = false;
						}
					}

				}
				else
				{
					result = false;
					//throw new FileNotFoundException("w肳ꂽLXg̃t@C݂܂B", path);
				}
			}
			return result;
		}

		/// <summary>
		/// LXg[hB
		/// LXg
		/// L܂܂́AⓚpŃXgbv[hƔ肷B
		/// </summary>
		/// <param name="dirPath">L𗅗񂵂eLXgt@CۑꂽfBNg̃pX</param>
		public void LoadSymbolList(string dirPath)
		{
			if (!Directory.Exists(dirPath))
			{
				throw new DirectoryNotFoundException("w肳ꂽLXg̃fBNg݂܂B");
			}
			else
			{
				LoadSymbolList(Directory.GetFiles(dirPath));
			}
		}

		#endregion



	}
}
