/*
 *	Qizx/Open version 0.3
 *
 *	Copyright (c) 2003-2004 Xavier C. FRANC -- All rights reserved.
 *
 *	This program is free software; you can redistribute it  and/or
 *	modify it under the terms of the GNU General Public License as
 *	published by the Free Software Foundation (see LICENSE.txt).
 */

package net.xfra.qizxopen.xquery;

import net.xfra.qizxopen.util.Util;
import net.xfra.qizxopen.xquery.dm.*;
import net.xfra.qizxopen.dm.IDocument;

import java.io.InputStream;
import java.io.File;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.util.Vector;
import java.net.URL;
import org.xml.sax.InputSource;
import org.xml.sax.EntityResolver;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.*;

import com.sun.resolver.Catalog;
import com.sun.resolver.CatalogManager;
import com.sun.resolver.tools.CatalogResolver;

/**
 *  Provides access to the data-model of XML documents.
 *  <p>Resolves the URI of a document and parses it or opens it according to its
 *  representation: the open-source version can only parse source XML documents; the
 *  full-blown implementation has access to compressed persistent randomly accessible
 *  representations called ZIPDocuments.
 *  <p>Parsed documents are managed in a cache with configurable size
 *  (see {@link #setCacheSize}).
 *  <p>Supports XML catalogs through
 *  Norman Walsh's <code>com.sun.resolver.tools.CatalogResolver</code>.
 */
public class DocumentManager
{
    protected SAXParserFactory factory;
    protected URL baseURL;
    protected EntityResolver entityResolver;

    final static int MIN_CACHE_SIZE = 128 * 1024;
    private final static String CACHE_SIZE_PROP = "qizx.docman.cachesize";
    private long cacheSize = 8 * 1048576;  // 8Mb default size, configurable by sys prop 
    private Vector cache = new Vector();

    /**
     *	Creation with a single base location.
     *	@param baseURL the base URL used for resolving a relative document URI.
     *	For parsed documents, any Java-supported URL is suitable. 
     */
    public DocumentManager( URL baseURL ) {
	init(baseURL);
    }

    /**
     *	Creation with a single base location.
     */
    public DocumentManager( String baseURI ) throws IOException {
	init( Util.uriToURL(baseURI) );
    }

    /**
     *	Returns the current baseURI.
     */
    public String getBaseURI() {
	return baseURL.toString();
    }

    /**
     *	Defines the maximal memory size of the document cache. This size is otherwise
     *	defined by the system property "qizx.docman.cachesize".
     *	@param size in bytes (hint).
     */
    public void setCacheSize( long size ) {
	cacheSize = Math.max( size, MIN_CACHE_SIZE );
    }

    private void init( URL baseURL ) {
	this.baseURL = baseURL;
	factory = SAXParserFactory.newInstance();
	factory.setNamespaceAware(true);

	// an absurdity in Crimson:
	System.setProperty("entityExpansionLimit", "1000000000");
	//
	try {
	    Class cls = Class.forName("com.sun.resolver.tools.CatalogResolver");
	    CatalogManager.ignoreMissingProperties(true);

	    entityResolver = (EntityResolver) cls.newInstance();
	}
	catch (Exception e) { // just keep silent  
	}

	String sysp = System.getProperty(CACHE_SIZE_PROP);
	if(sysp != null) {
	    try {
		setCacheSize( Long.parseLong(sysp) );
	    } catch(Exception ignored) { }
	}
    }

    protected synchronized FONIDataModel getCachedDocument(String uri) {
	// linear search: never mind!
	for(int d = 0, D = cache.size(); d < D; d++) {
	    FONIDataModel doc = (FONIDataModel) cache.get(d);
	    if(uri.equals(doc.getDocumentURI())) {
		// put at head:
		cache.remove(d);
		cache.insertElementAt(doc, 0);
		return doc;
	    }
	}
	return null;
    }

    protected synchronized void cacheDocument(FONIDataModel doc) {
	if(!cache.contains(doc))
	    cache.insertElementAt(doc, 0);
	int cumulatedSize = 0;
	for(int d = 0, D = cache.size(); d < D; d++) {
	    FONIDataModel doc2 = (FONIDataModel) cache.get(d);
	    int size = doc2.estimateMemorySize();
	    if(cumulatedSize + size > cacheSize) {
		cache.setSize(d);
		break;
	    }
	    cumulatedSize += size;
	}
    }

    /**
     *	Overridable method for resolving a document URI to an actual URL.
     */
    protected URL resolveLocation( String uri ) throws IOException {
	return new URL(baseURL, uri);
    }

    /**
     *	Cached access by URI.
     */
    public Node findDocument( String uri ) throws EvalException {
	try {
	    URL rloc = resolveLocation(uri);
	    FONIDataModel dm = getCachedDocument(rloc.toString());
	    if(dm == null)
		try {
		    dm = parseDocument(rloc);
		}
		catch (org.xml.sax.SAXException sax) {
		    if(false) {
			sax.printStackTrace();
			if(sax.getException() != null) {
			    System.err.println("caused by: ");
			    sax.getException().printStackTrace();
			}
		    }
		    throw new EvalException( "XML parsing error in "+ uri +": "+ 
					     sax.getMessage(), sax.getException());
		}
	    if(dm == null)
		throw new EvalException(uri+" (document cannot be located)");
	    cacheDocument(dm);
	    return dm.getDocumentNode();
	}
	catch(IOException io) {
	    throw new EvalException("document IO error on: "+ uri, io);
	}
	//else System.err.println(" in cache "+uri);
    }

    final static String SAXLEX = "http://xml.org/sax/properties/lexical-handler";

    /**
     *	Simple document parsing (no caching). Helper method.
     */
    public FONIDataModel parseDocument( URL url )
	throws org.xml.sax.SAXException, IOException
    {
	InputStream in = url.openStream();
	InputSource source = new InputSource(in);
	source.setSystemId(url.toExternalForm());

	FONIDataModel parsed = null;
	try {
	    parsed = parseDocument(source);
	} finally {
	    in.close();
	}
	return parsed;
    }

    /**
     *	Simple document parsing (no caching). Helper method.
     */
    public FONIDataModel parseDocument( InputSource input )
	throws org.xml.sax.SAXException, IOException
    {
        try {
	    // Create a JAXP SAXParser
	    SAXParser saxParser = factory.newSAXParser();
	    XMLReader reader = saxParser.getXMLReader();
	    IDocument idoc = new IDocument();
	    try {
		reader.setProperty(SAXLEX, idoc);
	    } catch (Exception lex) {
		System.err.println("*** lexical-handler: "+lex);
	    }
	    if (entityResolver != null) 
		reader.setEntityResolver(entityResolver);
	    else
		reader.setEntityResolver(idoc);
	    reader.setDTDHandler(idoc);
	    reader.setContentHandler(idoc);
	    reader.setErrorHandler(idoc);

	    long T0 = System.currentTimeMillis();
	    reader.parse(input);
	    
	    return new FONIDataModel(idoc);
        }
        catch (ParserConfigurationException pce) {
            pce.printStackTrace(); return null;
        }
    }
}
