/*
 * This file is part of Nuts Framework.
 * Copyright(C) 2009-2012 Nuts Develop Team.
 *
 * Nuts Framework is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License any later version.
 * 
 * Nuts Framework is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Nuts Framework. If not, see <http://www.gnu.org/licenses/>.
 */
package nuts.tools.poi.doc;

import nuts.core.bean.Beans;
import nuts.core.bind.xml.Xmls;
import nuts.core.lang.i18n.Charsets;
import nuts.tools.poi.ESummary;

import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.LinkedHashMap;
import java.util.Map;

import javax.xml.transform.TransformerException;

import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;

/**
 * 
 */
public class DocTextExtractor extends DocTextProcessor {
	private boolean extractSummary = false;
	private boolean extractHeader = false;
	private boolean extractFooter = false;
	
	private EDocument edoc;
	private Map<Integer, EParagraph> erange;
	private EParagraph eparagraph;
	
	/**
	 * Constructor
	 */
	public DocTextExtractor() {
	}

	/**
	 * @return the extractSummary
	 */
	public boolean isExtractSummary() {
		return extractSummary;
	}

	/**
	 * @param extractSummary the extractSummary to set
	 */
	public void setExtractSummary(boolean extractSummary) {
		this.extractSummary = extractSummary;
	}

	/**
	 * @return the extractHeader
	 */
	public boolean isExtractHeader() {
		return extractHeader;
	}

	/**
	 * @param extractHeader the extractHeader to set
	 */
	public void setExtractHeader(boolean extractHeader) {
		this.extractHeader = extractHeader;
	}

	/**
	 * @return the extractFooter
	 */
	public boolean isExtractFooter() {
		return extractFooter;
	}

	/**
	 * @param extractFooter the extractFooter to set
	 */
	public void setExtractFooter(boolean extractFooter) {
		this.extractFooter = extractFooter;
	}

	public void extractToXml(HWPFDocument doc, OutputStream output) throws TransformerException, UnsupportedEncodingException {
		OutputStreamWriter osw = new OutputStreamWriter(output, Charsets.CS_UTF_8);

		extractToXml(doc, osw);
	}

	public void extractToXml(HWPFDocument doc, Writer output) throws TransformerException {
		process(doc);

		Xmls.toXml(edoc, output, true);
	}

	@Override
	protected void handleDocument(HWPFDocument doc) {
		edoc = new EDocument();
		erange = null;
		eparagraph = null;
		if (extractSummary) {
			final SummaryInformation si = doc.getSummaryInformation();
			if (si != null) {
				ESummary es = new ESummary();
				es.copy(si);
				edoc.setSummary(es);
			}
		}
	}

	@Override
	protected boolean handleRange(String name, HWPFDocument doc, Range range) {
		if (extractHeader || !name.equals("header")) {
			erange = new LinkedHashMap<Integer, EParagraph>();
			Beans.setProperty(edoc, name, erange);
			return true;
		}
		return false;
	}

	@Override
	protected boolean handleParagraph(Range range, Paragraph paragraph, int index) {
		eparagraph = new EParagraph();
		erange.put(index, eparagraph);
		return true;
	}

	@Override
	protected boolean handleText(Range range, Paragraph paragraph, CharacterRun crun, int index, ECharRun ecrun) {
		eparagraph.getCruns().put(index, ecrun);
		return false;
	}
}
