// Copyright (C) 2017  Mocchi (mocchi_2003@yahoo.co.jp)
// License: Boost Software License   See LICENSE.txt for the full license.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace pdf_pp {
	abstract public class Item {
		abstract public string Serialize();
	}
	public class Dict : Item {
		public KeyValuePair<string, Item>[] dict = null;
		public Item GetValue(string name) {
			return dict.Where(kv => kv.Key == name).Select(kv=>kv.Value).FirstOrDefault();
		}
		override public string Serialize() {
			return "<<\n" + string.Join(" ", dict.Where(kv => kv.Key != null).Select(kv => kv.Key + " " + kv.Value.Serialize()).ToArray()) + "\n>>\n";
		}
		public void SetValue(string key, Item value){
			var new_kv = new KeyValuePair<string, Item>(key, value);
			for (int i = 0; i < dict.Length; ++i) {
				if (dict[i].Key != key) continue;
				dict[i] = new_kv;
				return;
			}
			dict = dict.Concat(new KeyValuePair<string, Item>[] { new_kv }).ToArray();
		}
		public void RemoveKV(string key) {
			for (int i = 0; i < dict.Length; ++i) {
				if (dict[i].Key != key) continue;
				dict[i] = new KeyValuePair<string, Item>(null, null);
			}
		}
		public Dict ShallowCopy() {
			Dict new_dict = new Dict();
			new_dict.dict = new KeyValuePair<string, Item>[dict.Length];
			dict.CopyTo(new_dict.dict, 0);
			return new_dict;
		}
	}
	public class Ary : Item {
		public Item[] ary = null;
		override public string Serialize() {
			return "[ " + string.Join(" ", ary.Select(a=>a.Serialize()).ToArray()) + " ]";
		}
	}
	public class Value : Item {
		public string val;
		public Value(string val) {
			this.val = val;
		}
		override public string Serialize() {
			return val;
		}
	}
	public class Ref : Item {
		public string[] reference = null;
		override public string Serialize() {
			return string.Join(" ", reference);
		}
	}
	public class Obj {
		public string[] objname;
		public Item item;
		public long streampos;
		public Obj(string[] obj_tokens, long streampos) {
			this.streampos = streampos;
			if (obj_tokens.Length < 2) throw new Exception("too short tokens for parsing object.");
			int idx = (obj_tokens[0] == "trailer") ? 1 : 3;
			objname = obj_tokens.Take(idx).ToArray();
			List<int> pos_bracket = new List<int>();
			List<Item> stack = new List<Item>();
			for (int j = idx; j < obj_tokens.Length; ++j) {
				string cur_tok = obj_tokens[j];
				if (cur_tok == "R") {
					if (stack.Count < 2) throw new Exception("lack of preceeding tokens of reference.");
					Ref refer = new Ref();
					int ref_start = stack.Count - 2;
					Value[] ref_itm = stack.Skip(ref_start).Take(2).Select(s => s as Value).ToArray();
					if (ref_itm[0] == null || ref_itm[1] == null) throw new Exception("reference type mismatch.");
					refer.reference = ref_itm.Select(ri=>ri.val).Concat(new []{"R"}).ToArray();
					stack.RemoveRange(ref_start, 2);
					stack.Add(refer);
				} else if (cur_tok == "<<") {
					pos_bracket.Add(stack.Count);
				} else if (cur_tok == ">>") {
					var cur_dict = new List<KeyValuePair<string, Item>>();
					int start_pos = pos_bracket.Last();
					pos_bracket.RemoveAt(pos_bracket.Count - 1);
					// start_pos  stack.Count Ƃ
					int cnt = stack.Count - start_pos;
					if ((cnt & 1) == 1) throw new Exception("the count of dictionary item is odd.");
					for (int i = start_pos; i < stack.Count; i += 2) {
						// 1߂ Value łȂƂ
						Value name = stack[i] as Value;
						if (name == null) throw new Exception("type mismatch on name cast");
						cur_dict.Add(new KeyValuePair<string, Item>(name.val, stack[i + 1]));
					}
					stack.RemoveRange(start_pos, cnt);
					var dict = new Dict();
					dict.dict = cur_dict.ToArray();
					stack.Add(dict);
				} else if (cur_tok == "[") {
					pos_bracket.Add(stack.Count);
				} else if (cur_tok == "]") {
					Ary ary = new Ary();
					int start_pos = pos_bracket.Last();
					pos_bracket.RemoveAt(pos_bracket.Count - 1);
					int cnt = stack.Count - start_pos;
					ary.ary = stack.Skip(start_pos).Take(cnt).ToArray();
					stack.RemoveRange(start_pos, cnt);
					stack.Add(ary);
				} else stack.Add(new Value(cur_tok));
				if (pos_bracket.Count == 0) {
					if (stack.Count != 1) throw new Exception(string.Format("parse failed in {0}", string.Join(" ", objname)));
					item = stack[0];
					break;
				}
			}
		}
	}
	public class Program {
		#region ȈՃeLXgo͏
		/// <summary>
		/// t@C܂łǂݍŕzɕϊĕԂB
		/// </summary>
		/// <param name="fs"></param>
		/// <returns></returns>
		static string[] GetLinesToEnd(FileStream fs, long filesize) {
			byte[] buf = new byte[filesize - fs.Position];
			fs.Read(buf, 0, buf.Length);
			return Encoding.ASCII.GetString(buf).Replace("\r\n", "\n").Replace("\r", "\n").Split('\n');
		}
		static string GetLine(FileStream fs) {
			var bytes = new List<byte>();
			for(;;){
				int b = fs.ReadByte();
				if (b == '\r'){
					int bn = fs.ReadByte();
					if (bn != '\n') fs.Seek(-1, SeekOrigin.Current);
					break;
				}else if (b == '\n' || b < 0) break;
				bytes.Add((byte)b);
			}
			return Encoding.ASCII.GetString(bytes.ToArray());
		}
		static void Write(FileStream fs, string str) {
			byte[] bary = Encoding.ASCII.GetBytes(str);
			fs.Write(bary, 0, bary.Length);
		}
		#endregion
		public static string[] GetTokensFromOneLine(string line) {
			char[] whitespace_chars = { '\0', '\t', (char)12, ' ' };
			return line.Split(whitespace_chars).Where(t=>!string.IsNullOrEmpty(t)).ToArray();
		}
		public static string GetToken(FileStream fs) {
			char[] whitespace_chars = { '\0', '\t', '\r', '\n', (char)12, ' ' };
			int c = -1;
			char cc;
			// g[NE΂
			do{
				c = fs.ReadByte(); cc = (char)c;
			} while (c >= 0 && whitespace_chars.Any(t => t == cc));
			string tok = "";

			// ŏ1ڂ̋L̏ꍇ́Ap̏{
			if (cc == '[' || cc == ']') {
				return new string(cc, 1);
			}else if (cc == '(') {
				tok += cc;
				int parentheses_cnt = 1;
				// ʂ܂œǂݍ݁Aʂ͈̔͑SĂg[NƂĕԂB
				while (parentheses_cnt > 0) {
					c = fs.ReadByte(); cc = (char)c;
					if (c >= 0) tok += cc; else break;
					if (cc == '(') ++parentheses_cnt;
					else if (cc == ')') --parentheses_cnt;
				}
				return tok;
			} else if (cc == '<'){
				c = fs.ReadByte(); cc = (char)c;
				if (cc == '<') return "<<";
				tok = "<" + cc;
				while(cc != '>'){
					c = fs.ReadByte(); cc = (char)c;
					if (c >= 0) tok += cc; else break;
				}
				return tok;
			} else if (cc == '>'){
				c = fs.ReadByte(); cc = (char)c;
				if (cc == '>') return ">>";
				if (c >= 0) fs.Seek(-1, SeekOrigin.Current);
				return new string(cc, 1);
			}
			// ŏ1ڂLȊŐL̏ꍇ́A󔒁A܂͋L܂łg[NƂď
			if (c >= 0) tok += cc;
			char[] token_end_chars = { '/', '[', ']' , '(', '<', '>' };
			for(;;){
				c = fs.ReadByte(); cc = (char)c;
				if (c >= 0 && whitespace_chars.All(t => t != cc) && token_end_chars.All(t => t != cc)) tok += cc; else break;
			}
			if (c >= 0) fs.Seek(-1, SeekOrigin.Current);
			return tok;
		}
		public static Obj GetObject(FileStream fs) {
			long stream_pos = -1;
			List<string> obj_tok = new List<string>();
			string tok = "";
			do {
				tok = GetToken(fs);
				if (tok == "stream") {
					GetLine(fs); // s̉sR[h΂
					stream_pos = fs.Position;
				} else if (tok == "endobj" || tok == "startxref") break;
				obj_tok.Add(tok);
			} while (!string.IsNullOrEmpty(tok));
			return new Obj(obj_tok.ToArray(), stream_pos);
		}
		public class Xref {
			public List<string> xref_lines = null;
			public Xref(FileStream fs, out Obj trailer) {
				xref_lines = new List<string>();
				trailer = null;
				for (;;) {
					string head_xref = GetLine(fs); // "xref"A܂ "* * obj" ̂͂
					if (head_xref.EndsWith("obj")){
						throw new Exception("xref stream is not supported");
					}
					for (; ; ) {
						long pos = fs.Position;
						string head = GetLine(fs);
						if (head == "trailer") {
							fs.Seek(pos, SeekOrigin.Begin);
							break;
						}
						string[] r = GetTokensFromOneLine(head);
						int obj_start = int.Parse(r[0]);
						int cnt = int.Parse(r[1]);
						if (xref_lines.Count < obj_start) xref_lines.AddRange(Enumerable.Range(0, obj_start - xref_lines.Count).Select(v => (string)null).ToArray());
						for (int i = obj_start, ii = 0; ii < cnt; ++i, ++ii) {
							string xref_line = GetLine(fs);
							if (i < xref_lines.Count) {
								if (!string.IsNullOrEmpty(xref_lines[i])) continue;
								else xref_lines[i] = xref_line;
							} else xref_lines.Add(xref_line);
						}
					}
					Obj cur_trailer = GetObject(fs);
					Dict dict_trailer = cur_trailer.item as Dict;
					if (dict_trailer == null) return;
					if (trailer == null) trailer = cur_trailer;
					Item itm_prev = dict_trailer.GetValue("/Prev");
					if (itm_prev is Value) fs.Seek(long.Parse(((Value)itm_prev).val), SeekOrigin.Begin);
					else break;
				};
			}
			public bool JumpToObject(FileStream fs, int obj_id){
				int line_idx = obj_id;
				if (line_idx < 0 || line_idx >= xref_lines.Count) throw new Exception(string.Format("object id:{0} is out of range", obj_id));
				string[] tokens = GetTokensFromOneLine(xref_lines[line_idx]);
				if (tokens.Length != 3) return false;
				fs.Seek(long.Parse(tokens[0]), SeekOrigin.Begin);
				return true;
			}
			public bool JumpToObject(FileStream fs, string[] obj_ref){
				if (obj_ref.Length != 3 || obj_ref[2] != "R") return false;
				return JumpToObject(fs, int.Parse(obj_ref[0]));
			}
		}
		public static bool FindPDFStructure(FileStream fs, out long filesize, out long startxref, out Xref xref, out Obj trailer, out Obj root_obj) {
			filesize = 0;
			startxref = 0;
			xref = null;
			trailer = null;
			root_obj = null;

			string[] tail_lines = null;
			byte[] buf = null;

			fs.Seek(0, SeekOrigin.Begin);
			// PDF t@Cǂ`FbNB
			buf = new byte[4];
			fs.Read(buf, 0, 4);
			if (!Encoding.ASCII.GetBytes("%PDF").SequenceEqual(buf)) return false;

			// startxref TB
			fs.Seek(0, SeekOrigin.End);
			filesize = fs.Position;
			long bufsize = 6 + (int)Math.Log10(filesize) * 2 + 10;
			fs.Seek(-bufsize, SeekOrigin.End);
			tail_lines = GetLinesToEnd(fs, filesize).Where(l => !string.IsNullOrEmpty(l)).ToArray();
			if (tail_lines[tail_lines.Length - 3] != "startxref") return false;

			// xref 擾B
			startxref = long.Parse(tail_lines[tail_lines.Length - 2]);
			fs.Seek(startxref, SeekOrigin.Begin);
			xref = new Xref(fs, out trailer);

			Dict dict_trailer = trailer.item as Dict;
			if (dict_trailer == null) throw new Exception("invalid trailer.");

			// "/Root" IuWFNg擾B
			Ref ref_root = dict_trailer.GetValue("/Root") as Ref;
			if (ref_root == null) throw new Exception("/Root is not found or invalid in trailer.");
			xref.JumpToObject(fs, ref_root.reference);
			root_obj = GetObject(fs);
			return true;
		}
		public static Ref[] TraversePages(FileStream fs, Ref root_pages_ref, Xref xref) {
			xref.JumpToObject(fs, root_pages_ref.reference);
			Obj root_pages = GetObject(fs);

			List<Ref> pages_refs = new List<Ref>();

			// Pagesc[TāAy[Wꗗ擾B
			var stack_pages = Enumerable.Range(0, 0).Select(s => new { obj = (Obj)null, idx = 0 }).ToList();
			stack_pages.Add(new { obj = root_pages, idx = 0 });
			while (stack_pages.Count > 0) {
				var cur_pages = stack_pages.Last();
				stack_pages.RemoveAt(stack_pages.Count - 1);
				string page_objname = string.Join(" ", cur_pages.obj.objname);
				Dict dict_pages = cur_pages.obj.item as Dict;
				if (dict_pages == null) throw new Exception(string.Format("pages \"{0}\" is not dictionary.", page_objname));
				Ary kids = dict_pages.GetValue("/Kids") as Ary;
				if (kids == null) throw new Exception(string.Format("/Kids is not found or invalid in pages {0}.", page_objname));
				for (int i = cur_pages.idx; i < kids.ary.Length; ++i) {
					Ref kids_itm = kids.ary[i] as Ref;
					if (kids_itm == null) throw new Exception(string.Format("item of /Kids is not reference in pages {0}.", page_objname));
					xref.JumpToObject(fs, kids_itm.reference);
					Obj cur = GetObject(fs);
					Dict cur_dict = cur.item as Dict;
					if (cur_dict == null) throw new Exception(
						string.Format("reference {0} of /Kids in {1} is not dict.", kids_itm.Serialize(), page_objname)
					);
					Value cur_type = cur_dict.GetValue("/Type") as Value;
					if (cur_type == null) throw new Exception(string.Format("/Type is not found or invalid in {0}", string.Join(" ", cur.objname)));
					if (cur_type.val == "/Page") {
						pages_refs.Add(kids_itm);
					} else if (cur_type.val == "/Pages") {
						stack_pages.Add(new { obj = cur_pages.obj, idx = i + 1 });
						stack_pages.Add(new { obj = cur, idx = 0 });
						break;
					} else throw new Exception(string.Format("type {0} is found in kids of pages in {1}.", cur_type.val, string.Join(" ", cur.objname)));
				}
			}
			return pages_refs.ToArray();
		}

		#region y[W
		public static string[] CreateSplittedPages(FileStream fs, Xref xref, Ref page_ref, int xdiv, int ydiv, double margin) {
			// ړĨy[W擾
			xref.JumpToObject(fs, page_ref.reference);
			Obj page = GetObject(fs);
			string pagename = string.Join(" ", page.objname);
			Dict dict_page = page.item as Dict;
	
			string[] all_boxnames = new string[] { "/MediaBox", "/CropBox", "/BleedBox", "/TrimBox", "/ArtBox" };
			Dictionary<string, Ary> boxes = new Dictionary<string, Ary>();
			// ꂼ Box 擾B  page m[hĂȂꍇ́Aem[hHĎ擾B
			Dict dict_ppage = dict_page;
			while (dict_ppage != null) {
				var boxes_undef = all_boxnames.Except<string>(boxes.Keys).ToArray();
				if (boxes_undef.Count() == 0) break;
				foreach (var boxname in boxes_undef) {
					Ary box = dict_ppage.GetValue(boxname) as Ary;
					if (box != null) boxes.Add(boxname, box);
				}
				Ref parent = dict_ppage.GetValue("/Parent") as Ref;
				if (parent == null) break;
				xref.JumpToObject(fs, parent.reference);
				Obj ppage = GetObject(fs);
				dict_ppage = ppage.item as Dict;
			}
			if (!boxes.ContainsKey("/MediaBox")) throw new Exception(string.Format("/MediaBox not found from page {0} and the ancestor.", pagename));
			List<string> objs = new List<string>();

			Ary mediabox = boxes["/MediaBox"];
			try {
				double[] mbox = mediabox.ary.Select(s => double.Parse((s as Value).val)).ToArray();
				double w = mbox[2] - mbox[0], h = mbox[3] - mbox[1];
				double dw = w / (double)xdiv, dh = h / (double)ydiv;
				double mw = dw * margin, mh = dh * margin;
				for (int j = ydiv - 1; j >= 0; --j) {
					double yl = (double)j * dh;
					double yh = yl + dh;
					if (j == 0) yh += mh;
					else if (j == ydiv - 1) yl -= mh;
					else {
						yl -= mh * 0.5; yh += mh * 0.5;
					}
					for (int i = 0; i < xdiv; ++i) {
						double xl = (double)i * dw;
						double xh = xl + dw;
						if (i == 0) xh += mw;
						else if (i == xdiv - 1) xl -= mw;
						else {
							xl -= mw * 0.5; xh += mw * 0.5;
						}
						Dict new_dict_page = dict_page.ShallowCopy();
						Ary new_mbox = new Ary();
						new_mbox.ary = new Item[] { new Value(xl.ToString()), new Value(yl.ToString()), new Value(xh.ToString()), new Value(yh.ToString()) };
						new_dict_page.SetValue("/MediaBox", new_mbox);
						// ̃{bNX`Ăꍇ́AVɍ쐬͈͂ŃNbvB
						foreach (var box in boxes) {
							if (box.Key == "/MediaBox") continue;
							Ary obx_ary = box.Value;
							if (obx_ary == null || obx_ary.ary.Length != 4) continue;
							double[] obox = obx_ary.ary.Select(s => double.Parse((s as Value).val)).ToArray();
							if (obox[0] < xl) obox[0] = xl;
							if (obox[1] < yl) obox[1] = yl;
							if (obox[2] > xh) obox[2] = xh;
							if (obox[3] > yh) obox[3] = yh;
							Ary new_obox = new Ary();
							new_obox.ary = obox.Select(ob=> (Item)new Value(ob.ToString())).ToArray();
							new_dict_page.SetValue(box.Key, new_obox);
						}
						objs.Add(new_dict_page.Serialize());
					}
				}
				return objs.ToArray();
			} catch {
				throw new Exception(string.Format("/MediaBox value is not numeric in page {0}", pagename));
			}
		}
		public static void CreateSplittedPDF(string cur_pdffilepath, string new_pdffilepath, int[] pages, int xdiv, int ydiv, double margin) {
			FileStream fs = null;
			try {
				File.Copy(cur_pdffilepath, new_pdffilepath, true);
				fs = new FileStream(new_pdffilepath, FileMode.Open, FileAccess.ReadWrite);

				long filesize, startxref;
				Obj trailer, root_obj;
				Xref xref;
				if (!FindPDFStructure(fs, out filesize, out startxref, out xref, out trailer, out root_obj)) {
					throw new Exception("failed to find PDF structure.");
				}

				// [gPages擾B
				Ref root_pages_ref = ((Dict)root_obj.item).GetValue("/Pages") as Ref;
				if (root_pages_ref == null) throw new Exception(string.Format("pages not found or invalid in {0}.", string.Join(" ", root_obj.objname)));

				Ref[] pages_ref = TraversePages(fs, root_pages_ref, xref);

				if (pages == null) pages = Enumerable.Range(0, pages_ref.Length).ToArray();
				string[] splitted_pages = pages.SelectMany(p => CreateSplittedPages(fs, xref, pages_ref[p], xdiv, ydiv, margin)).ToArray();
				long[] pos_splitted_pages = new long[splitted_pages.Length];

				int last_ref = xref.xref_lines.Count;

				fs.Seek(0, SeekOrigin.End);

				// y[WIuWFNgt@CɒǋLB
				for (int i = 0; i < splitted_pages.Length; ++i) {
					pos_splitted_pages[i] = fs.Position;
					string obj = string.Format("{0} 0 obj\n", i + last_ref) + splitted_pages[i] + "\nendobj\n";
					Write(fs, obj);
				}

				// Pages 蒼At@CɒǋL
				long pos_pages = fs.Position;

				string new_refs = "";
				for (int i = last_ref; i < last_ref + splitted_pages.Length; ++i) {
					new_refs += string.Format("{0} 0 R ", i);
				}
				string pages_obj = string.Format("{0} 0 obj\n<<\n/Type /Pages\n/Kids [ {1}]\n/Count {2}\n>>\nendobj\n", root_pages_ref.reference[0], new_refs, splitted_pages.Length);
				Write(fs, pages_obj);

				// Xref 蒼
				long pos_xref = fs.Position;
				Write(fs, "xref\n");
//				Write(fs, string.Format("{0} {1:D}\n", xref.obj_start, xref.xref_lines.Length + splitted_pages.Length));
				Write(fs, string.Format("{0} {1:D}\n", 0, xref.xref_lines.Count + splitted_pages.Length));
				for (int i = 0; i < xref.xref_lines.Count; ++i) {
//					if (i + xref.obj_start == int.Parse(root_pages_ref.reference[0])) {
					if (i == int.Parse(root_pages_ref.reference[0])) {
						Write(fs, string.Format("{0:D10} 00000 n\n", pos_pages));
					} else Write(fs, xref.xref_lines[i] + "\n");
				}
				for (int i = 0; i < pos_splitted_pages.Length; ++i) {
					Write(fs, string.Format("{0:D10} 00000 n\n", pos_splitted_pages[i]));
				}

				// Trailer 蒼
				Dict new_dict_trailer = (trailer.item as Dict).ShallowCopy();
				new_dict_trailer.SetValue("/Size", new Value((xref.xref_lines.Count + splitted_pages.Length).ToString()));
				new_dict_trailer.RemoveKV("/Prev");
				Write(fs, "trailer\n");
				Write(fs, new_dict_trailer.Serialize());

				// t@CI[
				Write(fs, string.Format("startxref\n{0}\n%%EOF\n", pos_xref));
				fs.Dispose();
			} catch (Exception e){
				if (fs != null) fs.Dispose();
				try {
					if (File.Exists(new_pdffilepath)) File.Delete(new_pdffilepath);
				} finally {
				}
				throw e;
			}
		}
		#endregion

		public static MainForm MainForm {
			get;
			set;
		}
		[STAThread]
		static void Main(string[] args) {
			MainForm = new MainForm(args.Length > 0 ? args[0] : null);
			Application.Run(MainForm);
			FileStream fs = null;
			if (args.Length < 1) return;
			try {
				// ŗ^ꂽt@CJB
				fs = new FileStream(args[0], FileMode.Open, FileAccess.Read);

				long filesize, startxref;
				Obj trailer, root_obj;
				Xref xref;
				if (!FindPDFStructure(fs, out filesize, out startxref, out xref, out trailer, out root_obj)) {
					throw new Exception("failed to find PDF structure.");
				}

				// [gPages擾B
				Ref root_pages_ref = ((Dict)root_obj.item).GetValue("/Pages") as Ref;
				if (root_pages_ref == null) throw new Exception(string.Format("pages not found or invalid in {0}.", string.Join(" ", root_obj.objname)));

				Ref[] pages_ref = TraversePages(fs, root_pages_ref, xref);
				fs.Dispose();
				fs = null;

				CreateSplittedPDF(args[0], args[0] + "_split.pdf", new int[] { 0, 1 }, 3, 3, 0.05);

				Console.WriteLine("Hello World!!");
				Console.ReadLine();
			} finally {
				if (fs != null) fs.Dispose();
			}
		}
	}
}
