/*
base program http://musyozoku211.blog118.fc2.com/?q=bpe
bpe.c
*/
using System;
using System.Diagnostics;
using System.Collections.Generic;

namespace Archiver.Bpe{
	class PairTable{
		protected const int TABLE_SIZE = 0x100;
		//protected 
		public const int SIGNED_TABLE_UNIT = 3 + 1;
	}
	class SignedList {
		Queue<byte> m_count, m_main, m_sub;
		public int Length{
			get {
				Debug.Assert(m_count.Count == m_main.Count);
				Debug.Assert(m_count.Count == m_sub.Count);
				return m_count.Count;
			}
		}
		public SignedList()
		{
			m_count = new Queue<byte>();
			m_main = new Queue<byte>();
			m_sub = new Queue<byte>();
		}
		public void Add(byte data, byte mainindex, byte subindex)
		{
			m_count.Enqueue(data);
			m_main.Enqueue(mainindex);
			m_sub.Enqueue(subindex);
		}
/*		public SignedTable ToTable(){
			Queue<byte>inputdata = new Queue<byte>();
			int type = m_count.Count == 0 ? 0 : 0x8000;
			while(m_count.Count != 0){
				inputdata.Enqueue(m_count.Dequeue());
				inputdata.Enqueue(m_main.Dequeue());
				inputdata.Enqueue(m_sub.Dequeue());
			}
			SignedTable ret = new SignedTable();
			ret.Read(inputdata, type);
			return ret;
		}*/
		public void Write(List<byte> data)
		{
			if(m_main.Count == 0){
				return;
			}
			Debug.Assert(m_main.Count < 0x100);
			data.Add((byte) m_main.Count);
			while(m_count.Count != 0){
				data.Add(m_count.Dequeue());
				data.Add(m_main.Dequeue());
				data.Add(m_sub.Dequeue());
				data.Add(0); //dummy for alignment 4
			}
		}
	}
	class SignedTable : PairTable{
		byte [] m_main, m_sub;
		public SignedTable()
		{
			m_main = new byte [TABLE_SIZE];
			m_sub = new byte [TABLE_SIZE];
			for(int i = 0; i < TABLE_SIZE; i++){
				m_main[i] = (byte) i;
				m_sub[i] = 0;
			}
		}
		public bool Read(Queue<byte> input_data, Block.type type)
		{
			if(type != Block.type.Row){
				if(input_data.Count == 0){
					return false;
				}
				int pts = input_data.Dequeue();
				for(int i = 0; i < pts; i++){
					if(input_data.Count < SIGNED_TABLE_UNIT){
						return false;
					}
					int index = input_data.Dequeue();
					m_main[index] = input_data.Dequeue();
					m_sub[index] = input_data.Dequeue();
					input_data.Dequeue(); //dummy for alignment 4
				}
			}
			return true;
		}
		public byte MainGet(int index){
			return m_main[index];
		}
		public byte SubGet(int index){
			return m_sub[index];
		}
	}
	class EncodePair : PairTable{
		bool [] m_table;
		public EncodePair()
		{
			m_table = new bool[TABLE_SIZE];
			for(int i = 0; i < TABLE_SIZE; i++){
				m_table[i] = false;
			}
		}
		public void Load(byte [] block)
		{
			for(int i = 0; i < block.Length; i++){
				int index = block[i];
				m_table[index] = true;
			}
		}
		public bool UnusedCharFind(ref byte unused)
		{
			foreach(bool t in m_table){
				unused += 1;
				if(m_table[unused] == false){
					return true;
				}
			}
			return false;
		}
		public void FoundCharSet(byte data)
		{
			int index = (int) data;
			m_table[index] = true;
		}
	}
	class PairCount : PairTable{
		ushort [] m_data;
		public PairCount()
		{
			m_data = new ushort[TABLE_SIZE * TABLE_SIZE];
			for(int i = 0; i < m_data.Length; i++){
				m_data[i] = 0;
			}
		}
		int index_get(byte c0, byte c1)
		{
			return (c0 << 8) | c1;
		}
		public ushort Increment(byte c0, byte c1)
		{
			int index = index_get(c0, c1);
			m_data[index] += 1;
			return m_data[index];
		}
		public void Clear(byte c0, byte c1)
		{
			int index = index_get(c0, c1);
			m_data[index] = 0;
		}
	}
	class Buffer{
		byte [] m_work;
		readonly int m_size;
		int m_offset = 0;
		public Buffer(int size)
		{
			m_size = size;
			m_work = new byte [size];
		}
		public void Load(byte [] data, ref int offset)
		{
			System.Buffer.BlockCopy(data, offset, m_work, 0, m_size);
			offset += m_size;
		}
		public void Load(Queue<byte> data)
		{
			for(int i = 0; i < m_size; i++){
				m_work[i] = data.Dequeue();
			}
		}
		public int Offset{
			get {return m_offset;}
		}
		public byte Shift(){
			return m_work[m_offset++];
		}
	}
	class Contents{
		byte [] m_data;
		public int Length{
			get{return m_data.Length;}
		}
		public void Write(List<byte> block)
		{
			foreach(byte t in m_data){
				block.Add(t);
			}
		}
		public void Encode(byte [] block, EncodePair pairtable, SignedList signed_list)
		{
			PairCount paircount = new PairCount();
			int size = block.Length; //??????
			byte encode_data = 0xff;
			while(true){
				if(pairtable.UnusedCharFind(ref encode_data) == false){
					break;
				}
				/* ペア出現用カウンタにカウントし、最もよく出るペア(c1,c2)を得る */
				ushort maxcount = 0;
				byte c0 = 0, c1 = 0;
				for(int i = 0; i < size - 1; i++){
					byte cn0 = block[i + 0];
					byte cn1 = block[i + 1];
					ushort tcount = paircount.Increment(cn0, cn1);
					if(maxcount < tcount){
						c0 = cn0;
						c1 = cn1;
						maxcount = tcount;
					}
					if(i < size - 2){
						if((cn0 == cn1) && (cn1 == block[i + 2])){
							i++;
						}
					}
				}
				/* ペア(c0,c1)の出現回数が3以下なら終了 */
				if(maxcount <= 3){
					break;
				}
				/* ペア(c0,c1)を encode_data に置き換える
				同時にペア出現用カウンタの中身を0に戻す */
				paircount.Clear(c0, c1);
				int read_offset = 0, write_offset = 0;
				while(read_offset < size - 1){
					if(
						(block[read_offset] == c0) && 
						(block[read_offset+1] == c1)
					){
						if(block.Length - read_offset >= 3){
							paircount.Clear(block[read_offset + 1], block[read_offset + 2]);
						}else{
							//この分岐は元のプログラムのバグのような...
							paircount.Clear(block[read_offset + 1], 0);
						}
						block[write_offset++] = encode_data;
						read_offset += 2;
					}else{
						paircount.Clear(block[read_offset + 0], block[read_offset + 1]);
						block[write_offset++] = block[read_offset++];
					}
				}
				if(read_offset == size - 1){
					block[write_offset++] = block[read_offset];
				}

				size = write_offset;//????
				
				pairtable.FoundCharSet(encode_data);
				signed_list.Add(encode_data, c0, c1);
			}
			m_data = new byte[size];
			System.Buffer.BlockCopy(block, 0, m_data, 0, size);
		}
		public bool Decode(int bufsize, int blocksize, Buffer workbuf, SignedTable table)
		{
			List <byte> rowdata = new List <byte>();
			Stack<byte> stack = new Stack<byte>();
			while((workbuf.Offset < blocksize) || (stack.Count > 0)){
				byte ch;
				if(stack.Count == 0){
					ch = workbuf.Shift();
				}else{
					ch = stack.Pop();
				}
				while(true){
					if(ch == table.MainGet(ch)){
						Debug.Assert(stack.Count < bufsize);
						rowdata.Add(ch);
						break;
					}
					stack.Push(table.SubGet(ch));
					if(stack.Count >= 0x80){
						return false;
					}
					ch = table.MainGet(ch);
				}
			}
			m_data = rowdata.ToArray();
			return true;
		}
	}
	class Block{
		public enum type{
			Row, Compress, Backlog
		};
		protected const int BACKLOG_OFFSET_SIZE = 3;
		protected const int BLOCK_HEADER_SIZE = 2;
		protected const int BACKLOG_OFFSET_SHIFT = 16 - 13;
		protected const int BACKLOG_OFFSET_MASK = 0x1fff;
		protected const int BACKLOG_PAGE_MASK = 0xff0000;
	}
	class DecodeBlock : Block{
		readonly type m_type;
		readonly int m_blocksize;
		SignedTable m_table;
		Buffer m_workbuf;
		int fetch_little_endian(Queue<byte> bytedata, int length)
		{
			int ret = 0;
			Debug.Assert(length == 2 || length == 3);
			for(int i = 0; i < length; i++){
				int shift = i * 8;
				ret |= bytedata.Dequeue() << shift;
			}
			return ret;
		}
		public DecodeBlock(Queue<byte> data)
		{
			int headcode = fetch_little_endian(data, BLOCK_HEADER_SIZE);
			m_blocksize = headcode & ~0xc000;
			switch(headcode & 0xc000){
			case 0:
				m_type = type.Row;
				break;
			case 0x8000:
				m_type = type.Compress;
				break;
			case 0xc000:
				m_type = type.Backlog;
				break;
			}
		}
		public bool DataSet(Queue<byte> data_queue, byte [] data_all)
		{
			m_table = new SignedTable();

			m_workbuf = new Buffer(m_blocksize);
			if(m_type == type.Backlog){
				if(data_queue.Count < BACKLOG_OFFSET_SIZE){
					return false;
				}
				int offset = fetch_little_endian(data_queue, BACKLOG_OFFSET_SIZE);
				//page.offset になってるのを絶対offsetに変更
				offset = (offset & BACKLOG_OFFSET_MASK) | ((offset & BACKLOG_PAGE_MASK) >> BACKLOG_OFFSET_SHIFT);
				m_workbuf.Load(data_all, ref offset);
				Queue<byte> backdata = new Queue<byte>();
				int pts = data_all[offset] * PairTable.SIGNED_TABLE_UNIT + 1; //offset++ はやらない
				for(int i = 0; i < pts; i++){
					backdata.Enqueue(data_all[offset + i]);
				}
				if(m_table.Read(backdata, m_type) == false){
					return false;
				}
			}else{
				//read block data
				if(data_queue.Count < m_blocksize){
					return false;
				}
				m_workbuf.Load(data_queue);
				if(m_table.Read(data_queue, m_type) == false){
					return false;
				}
			}
			return true;
		}
		public bool Write(int buffer_size, List<byte> out_data)
		{
			Contents context = new Contents();
			if(context.Decode(buffer_size, m_blocksize, m_workbuf, m_table) == false){
				return false;
			}
			context.Write(out_data);
			return true;
		}
	}
	class EncodeBlock : Block{
		type m_type;
		int m_blocksize;
		List<byte> m_data = new List<byte>();
		void block_set(type au_type, int size)
		{
			m_type = au_type;
			m_blocksize = size;
		}
		public void BacklogSet(byte [] bpedata, int bpeoffset)
		{
			int size = bpedata[bpeoffset++];
			size |= bpedata[bpeoffset++] << 8;
			block_set(type.Backlog, size);
			//実際の offset を分離する
			//bit0-12 -> bit0-12, bit15=1 (mdc5 ROM address)
			//bit13-20 -> bit16-23
			int page = bpeoffset & ~BACKLOG_OFFSET_MASK;
			page <<= BACKLOG_OFFSET_SHIFT;
			bpeoffset &= BACKLOG_OFFSET_MASK;
			bpeoffset |= 0x8000;
			bpeoffset |= page;
			Container.pack_little_endian(m_data, bpeoffset, BACKLOG_OFFSET_SIZE);
		}
		public void Write(List<byte> out_data)
		{
			int block_header = 0;
			switch(m_type){
			case type.Compress:
				block_header = 0x8000;
				break;
			case type.Backlog:
				block_header = 0xc000;
				break;
			case type.Row:
				block_header = 0;
				break;
			}
			block_header |= m_blocksize;
			Container.pack_little_endian(out_data, block_header, BLOCK_HEADER_SIZE);
			foreach(byte t in m_data.ToArray()){
				out_data.Add(t);
			}
		}
		public void Encode(Queue<byte> input_data, int size)
		{
			byte [] row = new byte [size];
			for(int i = 0; i < size; i++){
				row[i] = input_data.Dequeue();
			}
			EncodePair pairtable = new EncodePair();
			pairtable.Load(row);
			SignedList list = new SignedList();
			Contents context = new Contents();
			context.Encode(row, pairtable, list);
			
			type au_type = type.Row;
			if(list.Length != 0){
				au_type = type.Compress;
			}
			block_set(au_type, context.Length);
			context.Write(m_data);
			list.Write(m_data);
		}
	}
	class Container {
		readonly int m_buffer_size;
		const int BUFFER_SIZE_SIZE = 1;
		public Container(int bufsize)
		{
			m_buffer_size = bufsize;
		}
		int fetch_little_endian(Queue<byte> bytedata, int length)
		{
			int ret = 0;
			Debug.Assert(length <= 4);
			for(int i = 0; i < length; i++){
				int shift = i * 8;
				ret |= bytedata.Dequeue() << shift;
			}
			return ret;
		}
		static public void pack_little_endian(List<byte> encodedata, int data, int length)
		{
			Debug.Assert(length <= 4);
			for(int i = 0; i < length; i++){
				int shift = i * 8;
				encodedata.Add((byte) (data >> shift & 0xff));
			}
		}

		public bool Decode(byte [] bpedata, out byte [] rowdata)
		{
			Queue<byte> input_data = new Queue<byte>(bpedata);
			for(int i = 0; i < BUFFER_SIZE_SIZE; i++){ //buffersizeを捨てる
				input_data.Dequeue();
			}
			List<byte> out_data = new List<byte>();
			rowdata = null;
			while(input_data.Count != 0){
				DecodeBlock block = new DecodeBlock(input_data);
				if(block.DataSet(input_data, bpedata) == false){
					return false;
				}
				block.Write(m_buffer_size, out_data);
			}
			rowdata = out_data.ToArray();
			return true;
		}

		public bool Encode(byte [] rowdata, out byte []encodedata)
		{
			DuplicateMap [] map;
			int pagecount = rowdata.Length / m_buffer_size; //切り落とす前の長さを取っておく
			//2M 以下は切り落としを試みる
			if(rowdata.Length <= 2 * 0x20000){
				int rowsize = rowdata.Length;
				Duplicate d = new Duplicate(m_buffer_size);
				if(d.Encode(rowdata, out rowdata, out map) == false){
					encodedata = null;
					return false;
				}
				Debug.Assert(map.Length == rowsize / m_buffer_size);
			}else{
				map = new DuplicateMap[rowdata.Length / m_buffer_size];
				for(int i = 0; i < map.Length; i++){
					map[i] = new DuplicateMap();
					map[i].Offset = i * m_buffer_size;
					map[i].Original = true;
				}
			}
			int pageoffset = 0;
			//key is rowoffset, data is encoded offset
			Dictionary<int, int> offsetlog = new Dictionary<int, int>();
			Queue<byte> input_data = new Queue<byte>(rowdata);
			List<byte> bpedata = new List<byte>();
			pack_little_endian(bpedata, 0x2000 / m_buffer_size, BUFFER_SIZE_SIZE);
			Debug.Assert((0x2000 % m_buffer_size) == 0);
			while(input_data.Count != 0){
				EncodeBlock tblock = new EncodeBlock();
				{
					int rowoffset = rowdata.Length - input_data.Count;
					offsetlog[rowoffset] = bpedata.Count;
				}
				if(map[pageoffset].Original == false){
					int backlog = map[pageoffset].Offset;
					tblock.BacklogSet(bpedata.ToArray(), offsetlog[backlog]);
				}else{
					int blocksize = input_data.Count < m_buffer_size ? input_data.Count : m_buffer_size;
					tblock.Encode(input_data, blocksize);
				}
				tblock.Write(bpedata);
				pageoffset += 1;
			}
			//入力 dup が終わっても、map が残っている場合
			for(int i = pageoffset; i < pagecount; i++){
				int backlog = map[i].Offset;
				Debug.Assert(map[i].Original == false);
				EncodeBlock tblock = new EncodeBlock();
				tblock.BacklogSet(bpedata.ToArray(), offsetlog[backlog]);
				tblock.Write(bpedata);
			}
			encodedata = bpedata.ToArray();
			return true;
		}
	}
}
