//////////////////////////////////////////////////////////////////////////////////////
// SRegex.h
// SRegex饹μ
//////////////////////////////////////////////////////////////////////////////////////

#if !defined( SREGEX_H_INCLUDED_ )
#define SREGEX_H_INCLUDED_

/*
() : ̤ͥѹ
(^) : ̤ͥѹȡ
| : or
. : Ǥդΰʸ
[] : ʸ饹λ
[^] : ʸ饹λ
{} : 0ʾη֤ʺǾ
{^} : 0ʾη֤ʺ
\ : ץ
\xnnnn : 0xnnnnʸn0f4ʸ
\znnnn ; 0xnnnn
*/

#include <assert.h>
#include <vector>

#include "RegTxtPtr.h"
#include "RegTNode.h"
#include "InstanceMgr.h"

namespace N_SRegex {

enum SREGEX_ERR {
	SRE_NON					= 0x00000000,	// 
	SRE_EOF_NOT_ANTICIPATED	= 0xF0000001,	// ͽʤEOF
	SRE_OUT_OF_MEMORY		= 0xF0000002,	// ­
	SRE_UNCERTAIN_CHAR		= 0xF0000003,	// ʸ
	SRE_ZERO_LENGTH_PTN		= 0xF0000004,	// Ĺ0Υѥ
	SRE_UNDEFINED_BACKWARD	= 0xF0000005,	// 
	SRE_UNEXPENTED			= 0xF0000006	// ¾
};

// ʸӤ뤿δؿ֥
template < typename T_Char >
class DefaultCompare
{
public:
	DefaultCompare(){};
	virtual bool operator()( const T_Char &r1, const T_Char &r2 ) const
	{
		return r1 == r2;
	};
};

template < typename T_Char, typename T_Ptn, typename T_Text, typename T_Compare = DefaultCompare< T_Char > >
class SRegex
{
	// ѥǻѤ뵼ݥ󥿤Υǡ
	typedef N_SRegTxtPtr::RegTxtPtr< T_Char, T_Ptn >	T_PtnPtr;

	// Ρɤη򼨤
	typedef N_SRegexNode::EpsilonNode< T_Char, T_PtnPtr, T_Text >		T_NODE;
	typedef N_SRegexNode::StringNode< T_Char, T_PtnPtr, T_Text, T_Compare >			T_NODE_STR;
	typedef N_SRegexNode::ArbitCharNode< T_Char, T_PtnPtr, T_Text >		T_NODE_OC;
	typedef N_SRegexNode::TermNode< T_Char, T_PtnPtr, T_Text >			T_NODE_TERM;
	typedef N_SRegexNode::LazyLoopNode< T_Char, T_PtnPtr, T_Text >		T_NODE_LAZY;
	typedef N_SRegexNode::GreedyLoopNode< T_Char, T_PtnPtr, T_Text >	T_NODE_GREEDY;
	typedef N_SRegexNode::CharClassNode< T_Char, T_PtnPtr, T_Text, T_Compare >		T_NODE_CC;
	typedef N_SRegexNode::UnCharClassNode< T_Char, T_PtnPtr, T_Text, T_Compare >	T_NODE_UNCC;
	typedef N_SRegexNode::BackwardInfoNode< T_Char, T_PtnPtr, T_Text >	T_NODE_BWINFO;
	typedef N_SRegexNode::BackwardRefNode< T_Char, T_PtnPtr, T_Text >	T_NODE_BWREF;

	// T_CHARTYPE
	typedef N_SRegTxtPtr::CHARTYPE	T_CHARTYPE;

public:
	SRegex() :
		pNFA( NULL ),
		LastError( SRE_NON ),
		IsMatched( false ),
		IsInterrupted( false ),
		CompareChar()
	{};
	~SRegex(){};

	// 
	bool Initialize( T_Ptn pattern )
	{
		assert( NULL != this );

		T_PtnPtr PatternPtr( pattern );

		LastError = SRE_NON;	// 顼ͤ
		pErrorPos = pattern;
		pNFA = NULL;
		IsMatched = false;
		IsInterrupted = false;
		m_InsMgr.clear();
		m_InsMgr.SetInterruptedFlgAdr( &IsInterrupted );
		vBackwardInfo.clear();
		try {
			// NFAۤ
			pNFA = CreateNFA( &PatternPtr );
		}
		catch( SREGEX_ERR e ) {
			// 顼μȥ顼ȯ֤
			LastError = e;
			pErrorPos = PatternPtr.GetPtr();
		}

		// ԤΡɤΥ󥹥󥹤롣
		if ( NULL == pNFA ) {
			m_InsMgr.clear();
			vBackwardInfo.clear();
			return false;
		}
		return true;
	};

	// ޥåϰϤ򸡺
	bool FindMatchStr( T_Text pText, T_Text *ppSPos, T_Text *ppEPos )
	{
		assert( NULL != this && NULL != pNFA );
		T_Text wEPos;
		IsMatched = false;
		IsInterrupted = false;

		// Ƭʸĥޥå󥰤Ƥ椯
		while ( !( T_Char( '\0' ) == (*pText) ) && !IsInterrupted ) {
			if ( pNFA->Match( pText, &wEPos ) ) {
				// ޥå
				if ( ppSPos ) (*ppSPos) = pText;
				if ( ppEPos ) (*ppEPos) = wEPos;
				IsMatched = true;
				return true;
			}
			++pText;	// ʸ
		}
		return false;	// Ǹޤǥޥåʤä
	};

	// ޥå
	bool Match( T_Text pText, T_Text *ppEPos )
	{
		assert( NULL != this && NULL != pNFA && NULL != ppEPos );
		IsInterrupted = false;
		IsMatched = pNFA->Match( pText, ppEPos );
		return IsMatched;
	};

	// ޥå󥰽Ǥ
	void Interrupt()
	{
		IsInterrupted = true;
	};

	// Ȥξ
	void GetBackwardInfo( std::vector< std::pair< T_Text, T_Text > > *pVec ) const
	{
		// ޥåƤʤСϼǤʤ
		if ( !IsMatched ) return ;

		for ( int i = 0; i < vBackwardInfo.size(); i++ ) {
			T_Text s = vBackwardInfo[i]->GetMatchPos();
			T_Text e = vBackwardInfo[i]->GetRightPos();
			pVec->push_back( std::pair< T_Text, T_Text >( s, e ) );
		}
	};

	// 顼ͤ
	SREGEX_ERR GetLastError() const
	{
		assert( NULL != this );
		return LastError;
	};

	// 顼ȯս
	T_Ptn GetErrorPos() const
	{
		assert( NULL != this );
		return pErrorPos;
	};

protected:

	// ɽNFAۤ
	T_NODE* CreateNFA( T_PtnPtr *pPtr )
	{
		assert( NULL != this && NULL != pPtr );

		T_NODE *pFirst = NULL;
		T_NODE *pLast = NULL;

		// NFAκǽ֤ΥΡɤۤ
		T_NODE_TERM *pTerm = CreateNode< T_NODE_TERM >( &m_InsMgr );

		// NFAΤʬۤ
		if ( !CreateSubNode( pPtr, N_SRegTxtPtr::RTP_NULL, &pFirst, &pLast ) )
			return NULL;

		// ǽ֤ΥΡɤϿ
		pLast->AddNextNode( pTerm );

		return pFirst;
	};

	// ̾Ρ֥Ρɤۤ
	// pPtn : ѥΥݥ󥿤Υɥ쥹
	// EndCharType : ˻ꤷפʸޤǤΥ֥Ρɤۤ롣
	// pFirst : ֥ۤΡɤΡƬΥ֥Ρɤ֤
	// pLast : ƱΥ֥Ρɤ֤
	bool CreateSubNode( T_PtnPtr *pPtn, T_CHARTYPE EndCharType, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != this && NULL != pPtn && NULL != pFirst && NULL != pLast );

		T_NODE *pWork = NULL;	// ΥΡɤݻ

		// ƬΦܤΥΡɤ
		(*pFirst) = CreateNode< T_NODE >( &m_InsMgr );
		(*pLast) = CreateNode< T_NODE >( &m_InsMgr );

		pWork = (*pFirst);
		while ( pPtn->GetCharType() != EndCharType ) {
			T_NODE *wpFirst = NULL;	// ƤӽФδؿǹۤ줿
			T_NODE *wpLast = NULL;	// ƬȺǸΥΡɤݻ

			switch ( pPtn->GetCharType() ) {
			case N_SRegTxtPtr::RTP_OR:
				// |Ƭ̤ľ¸ߤ롣⤷|Ϣ³Ƥ롣
				if ( pWork == (*pFirst) ) throw SRE_ZERO_LENGTH_PTN;

				// ORи硢(*pFirst)(*pLast)ޤǤʣΥ󥯤³롣
				pWork->AddNextNode( (*pLast) );
				pWork = (*pFirst);
				++(*pPtn);	// |ʬʸʤ
				continue;
				break;

			case N_SRegTxtPtr::RTP_NORMAL:
				// ʸ˥ޥå
				if ( !CreateSubNode_STRING( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_PIERIOD:
				// ʸ˥ޥåΡɤ
				wpFirst = CreateNode< T_NODE_OC >( &m_InsMgr );
				wpLast = wpFirst;
				++(*pPtn);	// .ʬʸʤ
				break;

			case N_SRegTxtPtr::RTP_H_GRP:
				if ( !CreateSubNode_KAKKO( pPtn, &wpFirst, &wpLast, N_SRegTxtPtr::RTP_M_KAKKO ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_H_ZGRP:
				if ( !CreateSubNode_ZKAKKO( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_H_MAX:
				if ( !CreateSubNode_MAX( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_H_MIN:
				if ( !CreateSubNode_MIN( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_H_CC:
				if ( !CreateSubNode_CC( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_H_UNCC:
				if ( !CreateSubNode_UNCC( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			case N_SRegTxtPtr::RTP_ZID:
				if ( !CreateSubNode_ZID( pPtn, &wpFirst, &wpLast ) )
					return false;
				break;

			default:
				throw SRE_UNCERTAIN_CHAR;
			}
			if ( NULL == wpFirst || NULL == wpLast )
				throw SRE_UNEXPENTED;

			// ۤΡɤɲä롣
			pWork->AddNextNode( wpFirst );
			pWork = wpLast;
		}

		// ˥ѥ󤬤ʤ⤷|ǽäƤ롣
		if ( pWork == (*pFirst) ) throw SRE_ZERO_LENGTH_PTN;

		// ۤƤ󥯤üޤǤĤʤ롣
		pWork->AddNextNode( (*pLast) );
		return true;
	};

	// ̾ΡΥ֥Ρɤۡ
	bool CreateSubNode_KAKKO( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast, T_CHARTYPE EndCharType )
	{
		assert( NULL != this && NULL != pPtn && NULL != pFirst && NULL != pLast );

		++(*pPtn);	// ̤ʬʸʤ
		if ( !CreateSubNode( pPtn, EndCharType, pFirst, pLast ) )
			return false;
		++(*pPtn);	// ̤ʬʸʤ
		return true;
	};

	// ̾Ρ̡ȡΥ֥Ρɤۡ
	bool CreateSubNode_ZKAKKO( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != this && NULL != pPtn && NULL != pFirst && NULL != pLast );
		T_NODE *wpFirst;
		T_NODE *wpLast;

		// Ѥξݻ뤿ΥΡɤۤ
		T_NODE_BWINFO *pBI_L = CreateNode< T_NODE_BWINFO >( &m_InsMgr );	// ¦
		T_NODE_BWINFO *pBI_R = CreateNode< T_NODE_BWINFO >( &m_InsMgr );	// ¦
		pBI_L->SetRightNode( pBI_R );	// ¦ѥΡɤ˱¦ѥΡɤ
		vBackwardInfo.push_back( pBI_L );	// ¦ѥΡɤϿ

		++(*pPtn);	// ̤ʬʸʤ
		if ( !CreateSubNode( pPtn, N_SRegTxtPtr::RTP_M_KAKKO, &wpFirst, &wpLast ) )
			return false;
		++(*pPtn);	// ̤ʬʸʤ

		// ⥵֥ΡɤƬΡɤˡ¦ѥΡɤ
		pBI_L->AddNextNode( wpFirst );
		(*pFirst) = pBI_L;

		// ⥵֥ΡɤΡɤθˡ¦ѥΡɤ
		wpLast->AddNextNode( pBI_R );
		(*pLast) = pBI_R;

		return true;
	};

	// ̾Ρ[]Υ֥Ρɤ
	// ǽ*pFirst*pLast֤Υ󥯾ˤGreedyLoopΡɤۤ롣
	// Υ֥ΡɤGreedyLoop˰Ϥ
	bool CreateSubNode_MAX( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast );

		T_NODE *wpFirst = NULL;
		T_NODE *wpLast = NULL;
		T_NODE_GREEDY *pGreedyNode = CreateNode< T_NODE_GREEDY >( &m_InsMgr );

		// ΥΡɤ
		if ( !CreateSubNode_KAKKO( pPtn, &wpFirst, &wpLast, N_SRegTxtPtr::RTP_M_MAX ) )
			return false;

		// 롼оݤ
		pGreedyNode->SetLoopDest( wpFirst, wpLast, &m_InsMgr );

		// ͤ
		(*pFirst) = pGreedyNode;
		(*pLast) = pGreedyNode;
		return true;
	};

	// ̾Ρ{}Υ֥Ρɤ
	bool CreateSubNode_MIN( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast );

		T_NODE *wpFirst = NULL;
		T_NODE *wpLast = NULL;
		T_NODE_LAZY *pLazyNode = CreateNode< T_NODE_LAZY >( &m_InsMgr );

		// ΥΡɤ
		if ( !CreateSubNode_KAKKO( pPtn, &wpFirst, &wpLast, N_SRegTxtPtr::RTP_M_MIN ) )
			return false;

		// 롼оݤ
		pLazyNode->SetLoopDest( wpFirst, wpLast, &m_InsMgr );

		// ͤ
		(*pFirst) = pLazyNode;
		(*pLast) = pLazyNode;
		return true;
	};

	// ̾Ρʸ˥ޥå롼ɤ
	bool CreateSubNode_STRING( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast );

		T_NODE_STR *p = CreateNode< T_NODE_STR >( &m_InsMgr );
		SetStringToNode( pPtn, pFirst, pLast, p );
		p->SetCompareObj( &CompareChar );
		(*pFirst) = p;
		(*pLast) = p;
		return true;
	};

	// ̾Ρʸ饹ΥΡɤ
	bool CreateSubNode_CC( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast );

		T_NODE_STR *p = CreateNode< T_NODE_CC >( &m_InsMgr );	// Ρɤ
		++(*pPtn);	// {ʬ򥹥å
		SetStringToNode( pPtn, pFirst, pLast, p );
		++(*pPtn);	// }ʬ򥹥å
		p->SetCompareObj( &CompareChar );
		(*pFirst) = p;
		(*pLast) = p;
		return true;
	};

	// ̾Ρʸ饹ˤΥΡɤ
	bool CreateSubNode_UNCC( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast );

		T_NODE_STR *p = CreateNode< T_NODE_UNCC >( &m_InsMgr );
		++(*pPtn);	// {^ʬ򥹥å
		SetStringToNode( pPtn, pFirst, pLast, p );
		++(*pPtn);	// }ʬ򥹥å
		p->SetCompareObj( &CompareChar );
		(*pFirst) = p;
		(*pLast) = p;
		return true;
	};

	// ̾ΡȤξ򻲾ȤΡɤ
	bool CreateSubNode_ZID( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast );
		unsigned short BWIdx = pPtn->GetBackwardIdx();

		// ȤϰϤå
		if ( vBackwardInfo.size() <= BWIdx )
			throw SRE_UNDEFINED_BACKWARD;

		T_NODE_BWREF *p = CreateNode< T_NODE_BWREF >( &m_InsMgr );	// Ρɤ
		++(*pPtn);	// \znnnnʬ򥹥å
		p->SetpBackwardInfoNode( vBackwardInfo[ BWIdx ] );
		(*pFirst) = p;
		(*pLast) = p;
		return true;
	};

	// Ρɤ˻ꤵ줿ϰϤʸꤹ
	void SetStringToNode( T_PtnPtr *pPtn, T_NODE **pFirst, T_NODE **pLast, T_NODE_STR *pNode )
	{
		assert( NULL != pPtn && NULL != pFirst && NULL != pLast && NULL != pNode );

		// ʸꤹ
		while ( pPtn->GetCharType() == N_SRegTxtPtr::RTP_NORMAL ) {
			pNode->PushBackChar( (**pPtn) );
			++(*pPtn);
		}
	};

protected:
	InstanceMgr< T_NODE > m_InsMgr;
	T_NODE *pNFA;	// NFA
	std::vector< const T_NODE_BWINFO* > vBackwardInfo;	// ѤΥΡɤ
	bool IsMatched;	// ޥåݤ

	SREGEX_ERR LastError;	// Ǹȯ顼
	T_Ptn pErrorPos;		// 顼ȯս

	volatile bool IsInterrupted;	// ߤݤ

	const T_Compare CompareChar;	// ʸӤ٤δؿ֥
};

}; // N_SRegex

#endif // SREGEX_H_INCLUDED_

