 /* ------------------------------
  * CSVTokenizer.java
  * ------------------------------
  * (C)opyright 2003, abupon (Manabu Hashimoto)
  * This class is based on the CSV tokenizer found at
  * http://sourceforge.net/projects/csvtokenizer/
   */
 
package jp.riken.brain.ni.samuraigraph.base;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.NoSuchElementException;

/**
 * The csv tokenizer class allows an application to break a Comma Separated
 * Value format into tokens. The tokenization method is much simpler than the
 * one used by the <code>StringTokenizer</code> class. The
 * <code>CSVTokenizer</code> methods do not distinguish among identifiers,
 * numbers, and quoted strings, nor do they recognize and skip comments.
 * <p>
 * The set of separator (the characters that separate tokens) may be specified
 * either at creation time or on a per-token basis.
 * <p>
 * A <tt>CSVTokenizer</tt> object internally maintains a current position
 * within the string to be tokenized. Some operations advance this current
 * position past the characters processed.
 * <p>
 * A token is returned by taking a substring of the string that was used to
 * create the <tt>CSVTokenizer</tt> object.
 * <p>
 * The following is one example of the use of the tokenizer. The code:
 * <blockquote>
 * 
 * <pre>
 * CSVTokenizer csvt = new CSVTokenizer(&quot;this,is,a,test&quot;);
 * while (csvt.hasMoreTokens()) {
 * 	println(csvt.nextToken());
 * }
 * </pre>
 * 
 * </blockquote>
 * <p>
 * prints the following output: <blockquote>
 * 
 * <pre>
 * 
 *      this
 *      is
 *      a
 *      test
 *  
 * </pre>
 * 
 * </blockquote>
 * 
 * @author abupon
 * @version
 * @see
 * @since
 */
public class SGCSVTokenizer implements Enumeration {
	
	private String record;
	private int currentIndex;

	private static final String DOUBLE_QUATE = "\"";
	private static final String WHITE_SPACE = " \t\n\r\f,";
	private static final String SEPARATOR_COMMA = ",";
	private static final int SEPARATOR_LEN = 1;
	private static final int DOUBLE_QUATE_LEN = 1;

	private ArrayList mWhiteSpaceList = new ArrayList();
	private boolean is_csv_mode = false;
	private boolean is_comment_line = false;

	/**
	 * Constructs a csv tokenizer for the specified string.
	 * <code>theSeparator</code> argument is the separator for separating
	 * tokens.
	 * 
	 * @param aString
	 *            a string to be parsed.
	 * @param isDataFile
	 *            a data file reading flag
	 */
	public SGCSVTokenizer(final String aString, final boolean isDataFile) {
		this.record = aString.trim();
		this.currentIndex = 0;
		for (int ii = 0; ii < WHITE_SPACE.length(); ii++)
			this.mWhiteSpaceList.add(new Character(WHITE_SPACE.charAt(ii)));
		// check comment line
		if ( isDataFile && this.record.startsWith("#") )
			is_comment_line = true;
		// check camma separated mode
		char c;
		boolean in_quote = false;
		for (int ii = 0; ii < this.record.length(); ii++){
			c = this.record.charAt(ii);
			if(in_quote){
				if( c == '"')
					in_quote = false;
			}else{
				if( c == '"'){
					in_quote = true;
				}else if( c == ',' ){
					is_csv_mode = true;
				}
			}
		}
	}


	/**
	 * Tests if there are more tokens available from this tokenizer's string. If
	 * this method returns <tt>true</tt>, then a subsequent call to
	 * <tt>nextToken</tt> with no argument will successfully return a token.
	 * 
	 * @return <code>true</code> if and only if there is at least one token in
	 *         the string after the current position; <code>false</code>
	 *         otherwise.
	 */
	public boolean hasMoreTokens() {
		if( is_comment_line )
			return false;
		return (this.currentIndex >= 0);
	}

	/**
	 * Returns the next token from this string tokenizer.
	 * 
	 * @return the next token from this string tokenizer.
	 * @exception NoSuchElementException
	 *                if there are no more tokens in this tokenizer's string.
	 * @exception IllegalArgumentException
	 *                if given parameter string format was wrong
	 */
	public String nextToken() throws NoSuchElementException, IllegalArgumentException {
		String token = null;
		int start;
		int end;
		if (!this.hasMoreTokens()) {
			throw new NoSuchElementException();
		}
		if (this.record.startsWith(SGCSVTokenizer.DOUBLE_QUATE, this.currentIndex)) {
		    String rec = this.record.substring(this.currentIndex + SGCSVTokenizer.DOUBLE_QUATE_LEN);
		    token = "";
		    for (;;) {
		        end = rec.indexOf(SGCSVTokenizer.DOUBLE_QUATE);
		        if (end < 0) {
		            throw new IllegalArgumentException("Illegal format");
		        }
		        if (!rec.startsWith(SGCSVTokenizer.DOUBLE_QUATE, end + 1)) {
		            token = token + rec.substring(0, end);
		            break;
		        }
		        token = token + rec.substring(0, end + 1);
		        rec = rec.substring(end + SGCSVTokenizer.DOUBLE_QUATE_LEN * 2);
		        this.currentIndex++;
		    }
		    // don't trim string
		    this.currentIndex += (token.length() + SGCSVTokenizer.DOUBLE_QUATE_LEN * 2	+ SGCSVTokenizer.SEPARATOR_LEN);
		    if ( !is_csv_mode ){
		        this.currentIndex = nextTokenIndexOf(this.currentIndex);
		    }
		    if(this.currentIndex >= this.record.length())
		        this.currentIndex = -1;
		} else {
		    start = this.currentIndex;
		    if ( is_csv_mode )
		        end = this.record.indexOf(SEPARATOR_COMMA, this.currentIndex);
		    else
		        end = nextSeparatorIndexOf(this.currentIndex);
		    if (end >= 0) {
		        token = this.record.substring(start, end);
		        if ( is_csv_mode ){
		            this.currentIndex = end + SEPARATOR_LEN;
		        } else {
		            this.currentIndex = nextTokenIndexOf(end);
		            if(this.currentIndex == this.record.length())
		                this.currentIndex = -1;
		        }
		    } else {
		        // end of line reached
		        if(this.currentIndex == this.record.length())
		            token = "";
		        else
		            token = this.record.substring(start);
		        this.currentIndex = -1;
		    }
		    token = token.trim();
		}
		return token;
	}
	
	private int nextSeparatorIndexOf(int fromIndex) {
		char c;
		int cnt = 0;
		int ii;
		int len = this.record.length();
		if(len == fromIndex) return -1;
		for (ii = fromIndex; ii < len; ii++) {
			c = this.record.charAt(ii);
			if ( this.mWhiteSpaceList.contains(new Character(c)) )
				break;
			cnt++;
		}
		if(ii == len) return -1;
		return cnt + fromIndex;
	}
	
	private int nextTokenIndexOf(int fromIndex) {
		char c;
		int cnt = 0;
		int len = this.record.length();
		for (int ii = fromIndex; ii < len; ii++) {
			c = this.record.charAt(ii);
			if ( !this.mWhiteSpaceList.contains(new Character(c)) ) {
				break;
			}
			cnt++;
		}
		return cnt + fromIndex;
	}

	/**
	 * Returns the same value as the <code>hasMoreTokens</code> method. It
	 * exists so that this class can implement the <code>Enumeration</code>
	 * interface.
	 * 
	 * @return <code>true</code> if there are more tokens; <code>false</code>
	 *         otherwise.
	 * @see java.util.Enumeration
	 * @see java.util.SGCSVTokenizer#hasMoreTokens()
	 */
	public boolean hasMoreElements() {
		return hasMoreTokens();
	}

	/**
	 * Returns the same value as the <code>nextToken</code> method, except
	 * that its declared return value is <code>Object</code> rather than
	 * <code>String</code>. It exists so that this class can implement the
	 * <code>Enumeration</code> interface.
	 * 
	 * @return the next token in the string.
	 * @exception NoSuchElementException
	 *                if there are no more tokens in this tokenizer's string.
	 * @see java.util.Enumeration
	 * @see java.util.SGCSVTokenizer#nextToken()
	 */
	public Object nextElement() {
		return nextToken();
	}

	public static void main(String[] args) {
		int i = 1;
		String str;
		String expect;
		String result;

		str = "1, \t 2, \"\" 3,  4, \"a,  \"\"\\hoge\"";
		str = "1, \t 2, \"\" 3, b 4, a hoge";
		System.out.println("String : [" + str + "]");
		SGCSVTokenizer csvt = new SGCSVTokenizer(str, true);
		i = 1;
		while (csvt.hasMoreTokens()) {
			try {
				expect = String.valueOf(i++);
				result = csvt.nextToken();
				System.out.print(expect + ": [");
				System.out.println(result + "]");
			} catch (NoSuchElementException e) {
				e.printStackTrace();
				System.exit(-1);
			}
		}
		

	}

}
