/*
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

/*
  <file> UlsLex.h </file>
  <brief>
  </brief>
  <author>
    Stanley Hong <link2next@gmail.com>, April 2017.
  </author>
*/
#pragma once

#include "UlsFactory.h"

using namespace System;

namespace uls {
	namespace polaris {
#define ULS_EXCLUDE_HFILES
#include "uls/uls_config.h"
#undef ULS_EXCLUDE_HFILES
		public enum class InputOpts {
			WantEOF=0, DoDup, MsMbcsEncoding, Utf8Encoding
		};

		ref class UlsIStream;

		public ref class UlsLex : UlsObject
		{
			// <brief>
			// 
			// </brief>
			static UlsFactory ^uf;
			static UlsObjectListExp ^objs_list;
			static UlsLex();

			bool isFinalized, isDisposed;
			UlsFactory::uls_lex_t ^uls;
			Collections::Generic::Dictionary<int, Object^> ^Dic4ExtraTokdef;
			int input_flags;

			// <brief>
			// These flags affects in processing the input stream which is set by pushInput().
			// 1. WantEOF: At the end of file, getTok() will give you the EOF-token.
			int get_uls_flags(InputOpts fl);

		protected:
			String ^get_lexeme_lex();
			String ^get_tagstr_lex();

		public:
			static int WANT_EOFTOK;
			static int FILE_UTF8;

			static int LINE_NUMBERING;
			static int STREAM_BIN_LE;
			static int STREAM_BIN_BE;
			static int NEXTCH_NONE;

			static const UInt32 UCH_NONE = '\0';
			static int LITPROC_ERROR = ULS_LITPROC_ERROR;
			static int LITPROC_DISMISSQUOTE = ULS_LITPROC_DISMISSQUOTE;
			static int LITPROC_ENDOFQUOTE = ULS_LITPROC_ENDOFQUOTE;

			// <brief>
			// destructor
			// </brief>
			static UlsFactory ^getUlsFactory();

			// <brief>
			// destructor
			// </brief>
			static void dumpSearchPathOfUlc(String ^confname);
			static void listUlcSearchPaths();

			int toknum_EOI, toknum_EOF, toknum_ERR;
			int toknum_NONE, toknum_ID, toknum_NUMBER, toknum_TMPL;

			ref struct GetchDetailInfo {
				UInt32 uch;
				int tok_id;
				bool is_quote;
			};

			// <brief>
			// This is a constructor that creates an object for lexical analysis.
			// The 'ulc_file' is a name of language specification in the ulc system repository or
			//   simply a file path of ulc file.
			// To see the available ulc names, use the -q-option of the command 'ulc2class'.
			// </brief>
			// <parm name="ulc_file">The name/path of the lexical configuration.</parm>
			// <return>none</return>
			UlsLex(String ^ulc_fpath);

			// <brief>
			// destructor
			// </brief>
			virtual ~UlsLex();
			virtual void finalizer() override;
			!UlsLex();

			// This returns the current token number.
			// It can be directly read by dot-op as declared as public property.
			// </brief>
			// <return>token number</return>
			virtual property int TokNum {
				int get();
			}

			property int token {
				int get();
			}

			// <brief>
			// This returns the current token string.
			// It can be directly read by dot-op as declared as public property.
			// </brief>
			virtual property String ^TokStr {
				String ^get();
			}

			property String ^lexeme {
				String ^get();
			}

			// <brief>
			// This member with the 'LineNum' stands for the processing location of the current input.
			// As might be expected, the pair <Tag,LineNum> is updated as the input file is consumed.
			// It can be directly read by dot-op because it's as public property.
			// 'FileName' is a synonym of 'Tag'.
			// </brief>
			property String ^Tag {
				String ^get();
				void set(String ^tagstr);
			}

			property String ^FileName {
				String ^get();
				void set(String ^tagstr);
			}

			// <brief>
			// This property is the location of the current token in input.
			// It can be read directly because it's declared as public member.
			// This can be used with 'FileName' described below.
			// </brief>
			property int LineNum {
				int get();
				void set(int linenum);
			}

			// <brief>
			// Informs user that the current token is toknum_EOI
			// </brief>
			property bool isEOI {
				bool get();
			}

			// <brief>
			// Informs user that the current token is toknum_EOF.
			// </brief>
			property bool isEOF {
				bool get();
			}

			// <brief>
			// Informs user that the current token is toknum_ERR
			// </brief>
			property bool isERR {
				bool get();
			}
		
			// <brief>
			// In case that the current token is NUMBER, this'll return the suffix of number if it exists.
			// It's obvious that suffix of number is to inform compiler of its data types in programming language.
			// </brief>
			// <return>suffix string</return>
			property String ^lexemeNumberSuffix {
				String ^get();
			}

			// <brief>
			// This method checks if the lexeme of the current token is an floating-point number.
			// </brief>
			property bool isLexemeReal {
				bool get();
			}

			// <brief>
			// This method checks if the lexeme of the current token is an integer.
			// </brief>
			property bool isLexemeInt {
				bool get();
			}

			// <brief>
			// This method checks if the lexeme of the current token is zero.
			// </brief>
			property bool isLexemeZero {
				bool get();
			}

			// <brief>
			// This converts the current token string to primitive values.
			// lexemeAsInt() will recognize the current token string as an 'int'.
			// It returns the value after converting the token string to the primitive type.
			// Make sure the current token is a number.
			// </brief>
			// <return>primitive data type 'int'</return>
			property Int32 lexemeAsInt32 {
				Int32 get();
			}

			// <brief>
			// This converts the current token string to primitive values.
			// lexemeAsUInt() will recognize the current token string as an 'unsigned int'.
			// It returns the value after converting the token string to the primitive type.
			// Make sure the current token is a number.
			// </brief>
			// <return>primitive data type 'uint'</return>
			property UInt32 lexemeAsUInt32 {
				UInt32 get();
			}

			// <brief>
			// This converts the current token string to primitive values.
			// lexemeAsLong() will recognize the current lexeme as a 'long'.
			// It returns the value after converting the token string to the primitive type.
			// Make sure the current token is a number.
			// </brief>
			// <return>primitive data type 'long'</return>
			property Int64 lexemeAsInt64 {
				Int64 get();
			}

			// <brief>
			// This converts the current token string to primitive values.
			// lexemeAsULong() will recognize the current lexeme as an 'unsigned long'.
			// It returns the value after converting the token string to the primitive type.
			// Make sure the current token is a number.
			// </brief>
			// <return>primitive data type 'ulong'</return>
			property UInt64 lexemeAsUInt64 {
				UInt64 get();
			}

			// <brief>
			// This converts the current token string to primitive values.
			// lexemeAsDouble() will recognize the current lexeme as an 'double'.
			// It returns the value after converting the token string to the primitive type.
			// Make sure the current token is a number.
			// </brief>
			// <return>primitive data type 'double'</return>
			property Double lexemeAsDouble {
				Double get();
			}

			// <brief>
			// Moves the cursor of input-buffer up to the first nonblank character.
			// </brief>
			// <return>the character to which the cursor is pointing.</return>
			UInt32 skipBlanks();

			// <brief>
			// Delete the literal-string analyzer starting with 'pfx'
			//   if it's defined in the object.
			// </brief>
			// <parm name="pfx">The literal string analyzer of which the quote type is started with 'pfx'.</parm>
			// <return>none</return>
			void deleteLiteralAnalyzer(String ^pfx);

			// <brief>
			// Changes the literal-string analyzer to 'proc'.
			// The 'proc' will be applied to the quote type starting with 'pfx'.
			// </brief>
			// <parm name="pfx">The prefix of literal string that will be processed by 'proc'</parm>
			// <return>void</return>
			void changeLiteralAnalyzer(String ^pfx, UlsFactory::uls_litstr_analyzer_t proc, Object ^data);

			// <brief>
			// Returns the character at which the cursor of input-buffer is located.
			// If the cursor is pointing to a part of literal string, EOF, or EOI,
			//     the return value will be NEXTCH_NONE
			// </brief>
			// <return>the character to which the cursor is pointing.</return>
			virtual UInt32 peekCh(GetchDetailInfo ^uch_detail);
			UInt32 peekCh();

			// <brief>
			// Returns the character at which the cursor of input-buffer is located to.
			// This getCh() will advance the cursor to the next character.
			// But if the cursor comes across a literal string, EOF, or EOI,
			//    it'll just return NEXTCH_NONE without advancing the cursor.
			// </brief>
			// <return>the character to which the cursor is pointing.</return>
			virtual UInt32 getCh(GetchDetailInfo ^uch_detail);
			UInt32 getCh();

			// <brief>
			// Pushes the 'ch' into the input-buffer
			//    so that the next call of getCh, getTok will consider it in processing input stream.
			// </brief>
			// <parm name="ch">The character to push back</parm>
			virtual void ungetCh(UInt32 uch);

			// <brief>
			// Checks if 'tok_id' is an literal string or not
			// </brief>
			// <parm name="tok_id">The number to examine</parm>
			// <return>true if the 'tok_id' belongs to the gourp of literal-string tokens.</return>
			bool isQuoteTok(int tok_id);

			// <brief>
			// Checks if the 'ch' is space character in the context of the current lexical object.
			// </brief>
			// <parm name="ch">The character to examine</parm>
			// <return>true/false</return>
			bool isSpace(UInt32 uch);

			// <brief>
			// Checks if the 'ch' is a member of first character group of identifier
			// in the context of the current lexical object.
			// </brief>
			// <parm name="ch">The character to examine</parm>
			// <return>true/false</return>
			bool isIdfirst(UInt32 uch);

			// <brief>
			// Checks if the 'ch' is a part of identifier
			//   in the context of the current lexical object.
			// </brief>
			// <parm name="ch">The character to examine</parm>
			// <return>true/false</return>
			bool isId(UInt32 uch);

			// <brief>
			// Checks if the 'ch' is a member of the first character group of literal-strings
			// in the context of the current lexical object.
			// </brief>
			// <parm name="ch">The character to examine</parm>
			// <return>true/false</return>
			bool isQuote(UInt32 uch);

			// <brief>
			// Checks if the 'ch' is a punctuation character
			// in the context of the current lexical object.
			// </brief>
			// <parm name="ch">The character to examine</parm>
			// <return>true/false</return>
			bool is1CharTok(UInt32 uch);

			// <brief>
			// Checks if the 'ch' is a part of lexeme that consists of
			//   two or more non-identifier-characters.
			// </brief>
			// <parm name="ch">The character to examine</parm>
			// <return>true/false</return>
			bool is2CharTok(UInt32 uch);

			// <brief>
			// This is one of the main methods of the uls lexical analyzer object.
			// It gets a token and its associated lexeme advancing the cursor of input.
			// At the end of input you will get a special token EOI.
			// Notice the EOI token number should be retrieved by the field 'toknum_EOI'
			//    as it's a dynamic number varied for each lexical configuration.
			// Even if it returns the token number, you can get it also from the object.
			// Retrieve the property 'TokNum' to get the current token number and
			//    the property 'TokStr' to get the associated lexeme.
			// </brief>
			// <return>the token number</return>
			virtual int getTok();

			// <brief>
			// An alias of getTok().
			// </brief>
			// <return>token id</return>
			int getToken();

			// <brief>
			// An alias of getTok().
			// </brief>
			// <return>token id</return>
			int next();

			// <brief>
			// This method 'expect's the current token to be 'TokExpected'
			// If not, the current execution thread will be terminated abnormally.
			// </brief>
			// <parm name="TokExpected">The expected token number</parm>
			void expect(int TokExpected);

			// <brief>
			// </brief>
			// <return></return>
			void setTok(int t, String ^lxm);

			// <brief>
			// Pushes the current token into the buffer
			//   so that the next call will again return the token.
			// </brief>
			virtual void ungetTok();

			// <brief>
			// Pushes the 'str' into the buffer
			//   so that the next call will consider it in processing the input stream.
			// </brief>
			// <parm name="str">The string to push back</parm>
			virtual void ungetStr(String ^str);

			// <brief>
			// Pushes the 'tok_id' and it lexeme 'lxm' into the buffer
			//   so that the next call will consider it in processing the input stream.
			// </brief>
			// <parm name="lxm">The lexeme to push back</parm>
			// <parm name="tok_id">The token number to push back</parm>
			virtual void ungetLexeme(String ^lxm, int tok_id);

			// <brief>
			// This dumps the current token as as explanatory string, which is composed of the string 'pfx', 
			// 'suff', and the basic description of the token. 
			// The basic decription of token consists of <TokenName,TokenString>.
			// For example, if pfx is '\t' and suff is '\n',
			//    the output line to the terminal may be like as follows.
			//  '\t' [___ID] main '\n'
			// The dumpTok() without parameters will prepend '\t' and append '\n' to the string.
			// </brief>
			// <parm name="pfx">The 'pfx' is prepended at the front of the basic description of token.</parm>
			// <parm name="suff">The 'suff' is appended at the end of  the basic description of token.</parm>
			void dumpTok(String ^pfx, String ^suff);

			// <brief>
			// The dumpTok() without parameters will prepend '\t' and append '\n' to the string.
			// </brief>
			void dumpTok();
			void dumpTok(String ^pfx, String ^idstr, String ^tstr, String ^suff);

			// <brief>
			// Returns the keyword string corresponding to the token number 't'
			// </brief>
			// <parm name="t">A token number</parm>
			// <return>keyword string</return>
			String ^keyword(int t);

			// <brief>
			// Returns the keyword string corresponding to the current token number.
			// </brief>
			// <return>keyword string</return>
			String ^keyword();

			// <brief>
			// Returns the name string corresponding to the token number 't'.
			// The name is defined in the ulc-file.
			// </brief>
			// <parm name="t">A token number</parm>
			// <return>The token name string defined in the ulc-file</return>
			String ^nameOf(int t);
			String ^nameOf();

			// <brief>
			// The field 'LineNum' is automatically updated by calling getTok() but
			//  if you want to change it forcibly use this method.
			// </brief>
			// <parm name="lineno">The new value of 'LineNum' to be updated</parm>
			void setLineNum(int lineno);

			// <brief>
			// Use this method to add some lines to the current line number 'LineNum' forcibly.
			// If the resultant line number is negative the 'LineNum' won't updated.
			// </brief>
			// <parm name="amount">The amount of lines to be added. It may be negative</parm>
			void addLineNum(int amount);

			// <brief>
			// This will update the value 'FileName' with the parameter 'fname'
			// </brief>
			// <parm name="fname">The new string value to be updated</parm>
			// <return>none</return>
			void setFileName(String ^fname);

			// <brief>
			// This is to maninpulate the flags 'input_flags' defined above.
			// setInputOpts() sets the the intenal flag 'input_flags' to the flag 'fl'
			// </brief>
			void setInputOpt(InputOpts fl);

			// <brief>
			// This is to maninpulate the flags 'input_flags' defined above.
			// clearInputOpts() clears the flag 'fl' in the the intenal flag 'input_flags'.
			// </brief>
			void clearInputOpt(InputOpts fl);

			// <brief>
			// This is to maninpulate the flags 'input_flags' defined above.
			// getInputOpts() will just return the intenal flag 'input_flags'.
			// </brief>
			int getInputOpts();

			// <brief>
			// This is to maninpulate the flags 'input_flags' defined above.
			// resetInputOpts will clear the internal 'input_flags'.
			// </brief>
			void resetInputOpts();

			// <brief>
			// Pushes the content of file onto the current input.
			// The file content will be the top of stack of input-buffers.
			// When the cursor reaches the end-of-file of 'filepath', the input buffer will be poped and
			//   the cursor will be return to the previous location of input-buffer.
			// </brief>
			// <parm name="filepath">A new input that you want to tokenize</parm>
			bool pushFile(String ^filepath);
			void pushFile(String ^filepath, bool want_eof);
			void setFile(String ^filepath, bool want_eof);

			// <brief>
			// Pushes the parameter 'line' onto the current input.
			// The input buffer of 'line' will be the top of stack of input-buffers.
			// You should call popInput() or getTok() to return the previous input if the current token is EOF.
			// </brief>
			// <parm name="line">A new input that you want to tokenize</parm>
			void pushInput(String ^line);
			void pushLine(String ^line, bool want_eof);
			void setLine(String ^line, bool want_eof);

			// <brief>
			// This method will push an input string 'istr' on the top of the input stack.
			// Then the getTok() method can be used to get the tokens from the input.
			// </brief>
			// <parm name="istr">input stream object!
			//   You can create the input object from text file, uls-file, or literal-string.
			// </parm>
			// <return>true/false</return>
			bool pushInput(UlsIStream ^in_str);

			// <brief>
			// popInput() dismisses the current input source.
			// </brief>
			// <return>none</return>
			void popInput();

			// <brief>
			// popAllInputs() dismisses all the input sources and goes back to the initial state.
			// In the initial state you will get the EOI as current token.
			// </brief>
			void popAllInputs();

			// <brief>
			// This sets extra token definition 'extra_tokdef' as opaque data which should be provided by user.
			// The stored data of token number 't' can be later retrieved by getExtraTokdef().
			// Refer to the example in tests/dump_toks for example.
			// </brief>
			// <parm name="t">The target token number with which user data is associated</parm>
			// <parm name="extra_tokdef">The extra tokdef is provided by user</parm>
			void setExtraTokdef(int t, Object ^o);

			// <brief>
			// Use this method to get the user-defined token information stored previously by setExtraTokdef().
			// </brief>
			// <parm name="t">The target token id of data the you want to retrieve.</parm>
			// <return>The opaque data given by user</return>
			Object ^getExtraTokdef(int t);

			// <brief>
			// Returns the user-defined token data of the current token.
			// If you didn't set it with setExtraTokdef(), it'll return null.
			// </brief>
			// <return>The object which is associated with token 't'</return>
			Object ^getExtraTokdef();

			// <brief>
			// This returns the user-defined data associated with the current token.
			// It's equivalent to 'getExtraTokdef(TokNum)'.
			// </brief>
			property Object ^ExtraTokdef {
				Object ^get();
			}

			UlsFactory::uls_lex_t ^getCore();

			static void releaseUlsObjectList()
			{
				UlsObject::finalizeUlsObjectList(objs_list);
				objs_list = nullptr;
			}
		};
	}
}
