2006-11-13 21:54  orrisroot

	* SGCSVTokenizer.java: formated lines.	added NON-NLS-1 tag.

2006-02-20 23:54  orrisroot

	* SGCSVTokenizer.java: added 'this.' prefix.

2005-12-05 23:12  orrisroot

	* SGCSVTokenizer.java: implemented new feature of axis title
	  setting at new figure creation.

2005-08-17 13:19  orrisroot

	* SGCSVTokenizer.java: removed non effective else statement

2005-08-12 20:45  orrisroot

	* SGCSVTokenizer.java: added new feature for comment line accepting
	  in data file reading.

2005-07-12 17:13  orrisroot

	* SGCSVTokenizer.java: changed original copyrights.

2005-07-12 13:59  orrisroot

	* SGCSVTokenizer.java: added last comma field check when csv mode.

2005-07-09 23:43  orrisroot

	* SGCSVTokenizer.java: added TODO comment.  removed unused
	  variable.

2005-07-09 22:48  orrisroot

	* SGCSVTokenizer.java: removed wrong comments.

2005-07-09 22:44  orrisroot

	* SGCSVTokenizer.java: clean up source code.

2005-07-07 22:17  orrisroot

	* SGCSVTokenizer.java: fixed data file format check routine.

2005-07-07 10:06  orrisroot

	* SGCSVTokenizer.java: supperted CSV strings format.

--- CSVTokenizer.java	2008-01-15 12:46:26.220566000 +0900
+++ SGCSVTokenizer.java	2008-01-15 12:46:26.271566000 +0900
@@ -1,308 +1,293 @@
-package org.arhyme.csv;
-
-import java.util.*;
+/* ------------------------------
+ * CSVTokenizer.java
+ * ------------------------------
+ * (C)opyright 2003, abupon (Manabu Hashimoto)
+ * This class is based on the CSV tokenizer found at
+ * http://sourceforge.net/projects/csvtokenizer/
+ */
+
+package jp.riken.brain.ni.samuraigraph.base;
+
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.NoSuchElementException;
 
 /**
- * The csv tokenizer class allows an application to break a 
- * Comma Separated Value format into tokens. 
- * The tokenization method is much simpler than 
- * the one used by the <code>StringTokenizer</code> class. The 
- * <code>CSVTokenizer</code> methods do not distinguish among 
- * identifiers, numbers, and quoted strings, nor do they recognize 
- * and skip comments. 
+ * The csv tokenizer class allows an application to break a Comma Separated
+ * Value format into tokens. The tokenization method is much simpler than the
+ * one used by the <code>StringTokenizer</code> class. The
+ * <code>CSVTokenizer</code> methods do not distinguish among identifiers,
+ * numbers, and quoted strings, nor do they recognize and skip comments.
+ * <p>
+ * The set of separator (the characters that separate tokens) may be specified
+ * either at creation time or on a per-token basis.
  * <p>
- * The set of separator (the characters that separate tokens) may 
- * be specified either at creation time or on a per-token basis. 
+ * A <tt>CSVTokenizer</tt> object internally maintains a current position
+ * within the string to be tokenized. Some operations advance this current
+ * position past the characters processed.
  * <p>
- * An instance of <code>CSVTokenizer</code> behaves in one of two 
- * ways, depending on whether it was created with the 
- * <code>returnSeparators</code> flag having the value <code>true</code> 
- * or <code>false</code>: 
- * <ul>
- * <li>If the flag is <code>false</code>, delimiter characters serve to 
- *     separate tokens. A token is a maximal sequence of consecutive 
- *     characters that are not separator. 
- * <li>If the flag is <code>true</code>, delimiter characters are themselves 
- *     considered to be tokens. A token is thus either one delimiter 
- *     character, or a maximal sequence of consecutive characters that are 
- *     not separator.
- * </ul><p>
- * A <tt>CSVTokenizer</tt> object internally maintains a current 
- * position within the string to be tokenized. Some operations advance this 
- * current position past the characters processed.<p>
- * A token is returned by taking a substring of the string that was used to 
+ * A token is returned by taking a substring of the string that was used to
  * create the <tt>CSVTokenizer</tt> object.
  * <p>
  * The following is one example of the use of the tokenizer. The code:
- * <blockquote><pre>
- *     CSVTokenizer csvt = new CSVTokenizer("this,is,a,test");
- *     while (csvt.hasMoreTokens()) {
- *         println(csvt.nextToken());
- *     }
- * </pre></blockquote>
+ * <blockquote>
+ * 
+ * <pre>
+ * CSVTokenizer csvt = new CSVTokenizer(&quot;this,is,a,test&quot;);
+ * while (csvt.hasMoreTokens()) {
+ * 	println(csvt.nextToken());
+ * }
+ * </pre>
+ * 
+ * </blockquote>
  * <p>
- * prints the following output:
- * <blockquote><pre>
- *     this
- *     is
- *     a
- *     test
- * </pre></blockquote>
- * @author  abupon
+ * prints the following output: <blockquote>
+ * 
+ * <pre>
+ *  
+ *       this
+ *       is
+ *       a
+ *       test
+ *   
+ * </pre>
+ * 
+ * </blockquote>
+ * 
+ * @author abupon
  * @version
- * @see     
- * @since   
-*/
-public class CSVTokenizer implements Enumeration {
+ * @see
+ * @since
+ */
+public class SGCSVTokenizer implements Enumeration {
+
 	private String record;
-	private String separator;
-	private String quate;
-	private boolean returnSeparators;
 
 	private int currentIndex;
 
-	private static final String COMMA = ",";
-	private static final String TAB = "\t";
-	private static final String SPACE = " ";
+	private static final String DOUBLE_QUATE = "\""; //$NON-NLS-1$
 
-	private static final String DOUBLE_QUATE = "\"";
-	private static final String SINGLE_QUATE = "'";
+	private static final String WHITE_SPACE = " \t\n\r\f,"; //$NON-NLS-1$
 
-	/**
-	 * Constructs a csv tokenizer for the specified string.   
-	 * <code>theSeparator</code> argument is the separator 
-	 * for separating tokens. 
-	 * <p>
-	 * If the <code>returnSeparators</code> flag is <code>true</code>, 
-	 * then the separator string is also returned as tokens. 
-	 * separator is returned as a string. If the flag is 
-	 * <code>false</code>, the separator string is skipped and only 
-	 * serve as separator between tokens. 
-	 *
-	 * @param	aString			a string to be parsed.
-	 * @param	theSeparator	the separator 
-	 * 							(CSVTokenizer.COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
-	 * @param	theQuate		the quate 
-	 * 							(CSVTokenizer.SINGLE_QUATE, CSVTokenizer.DOUBLE_QUATE, etc.).
-	 * @param	fragReturnSeparators	flag indicating whether to return the separator
-	 * 							as tokens.
-	 */
-	public CSVTokenizer(
-		String aString,
-		String theSeparator,
-		String theQuate,
-		boolean fragReturnSeparators) {
-		this.record = aString.trim();
-		this.separator = theSeparator;
-		this.quate = theQuate;
-		this.returnSeparators = fragReturnSeparators;
-		this.currentIndex = 0;
-	}
+	private static final String SEPARATOR_COMMA = ","; //$NON-NLS-1$
 
-	/**
-	 * Constructs a csv tokenizer for the specified string. All  
-	 * characters in the <code>theSeparator</code> argument are the separator 
-	 * for separating tokens. 
-	 * <p>
-	 * If the <code>returnSeparators</code> flag is <code>true</code>, then 
-	 * the separator string is also returned as tokens.
-	 * separator is returned as a string. If the flag is 
-	 * <code>false</code>, the Separator string is skipped and only 
-	 * serve as separator between tokens. 
-	 *
-	 * @param	aString			a string to be parsed.
-	 * @param	theSeparator	the separator 
-	 * 							(CSVTokenizer.COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
-	 * @param	fragReturnSeparators	flag indicating whether to return the separator
-	 * 							as tokens.
-	 */
-	public CSVTokenizer(
-		String aString,
-		String theSeparator,
-		boolean fragReturnSeparators) {
-		this(
-			aString,
-			theSeparator,
-			CSVTokenizer.DOUBLE_QUATE,
-			fragReturnSeparators);
-	}
+	private static final int SEPARATOR_LEN = 1;
 
-	/**
-	 * Constructs a csv tokenizer for the specified string. The 
-	 * characters in the <code>theSeparator</code> argument are 
-	 * the separator for separating tokens. 
-	 * Separator string themselves will not be treated as tokens.
-	 * 
-	 * @param	aString			a string to be parsed.
-	 * @param	theSeparator	the separator 
-	 * 							(CSVTokenizer.COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
-	 */
-	public CSVTokenizer(String aString, String theSeparator) {
-		this(aString, theSeparator, false);
-	}
+	private static final int DOUBLE_QUATE_LEN = 1;
+
+	private ArrayList mWhiteSpaceList = new ArrayList();
+
+	private boolean is_csv_mode = false;
+
+	private boolean is_comment_line = false;
 
 	/**
-	 * Constructs a string tokenizer for the specified string. The 
-	 * tokenizer uses the default separator set, which is 
-	 * <code>CSVTokenizer.COMMA</code>. 
-	 * Separator string themselves will not be treated as tokens.
-	 * 
-	 * @param	aString			a string to be parsed.
+	 * Constructs a csv tokenizer for the specified string.
+	 * <code>theSeparator</code> argument is the separator for separating
+	 * tokens.
 	 * 
+	 * @param aString
+	 *            a string to be parsed.
+	 * @param isDataFile
+	 *            a data file reading flag
 	 */
-	public CSVTokenizer(String aString) {
-		this(aString, CSVTokenizer.COMMA);
+	public SGCSVTokenizer(final String aString, final boolean isDataFile) {
+		this.record = aString.trim();
+		this.currentIndex = 0;
+		for (int ii = 0; ii < WHITE_SPACE.length(); ii++)
+			this.mWhiteSpaceList.add(new Character(WHITE_SPACE.charAt(ii)));
+		// check comment line
+		if (isDataFile && this.record.startsWith("#")) //$NON-NLS-1$
+			this.is_comment_line = true;
+		// check camma separated mode
+		char c;
+		boolean in_quote = false;
+		for (int ii = 0; ii < this.record.length(); ii++) {
+			c = this.record.charAt(ii);
+			if (in_quote) {
+				if (c == '"')
+					in_quote = false;
+			} else {
+				if (c == '"') {
+					in_quote = true;
+				} else if (c == ',') {
+					this.is_csv_mode = true;
+				}
+			}
+		}
 	}
 
 	/**
-	 * Tests if there are more tokens available from this tokenizer's string. 
-	 * If this method returns <tt>true</tt>, then a subsequent call to 
+	 * Tests if there are more tokens available from this tokenizer's string. If
+	 * this method returns <tt>true</tt>, then a subsequent call to
 	 * <tt>nextToken</tt> with no argument will successfully return a token.
-	 *
-	 * @return  <code>true</code> if and only if there is at least one token 
-	 *          in the string after the current position; <code>false</code> 
-	 *          otherwise.
+	 * 
+	 * @return <code>true</code> if and only if there is at least one token in
+	 *         the string after the current position; <code>false</code>
+	 *         otherwise.
 	 */
 	public boolean hasMoreTokens() {
-		return (this.currentIndex < this.record.length());
+		if (this.is_comment_line)
+			return false;
+		return (this.currentIndex >= 0);
 	}
 
 	/**
 	 * Returns the next token from this string tokenizer.
-	 *
-	 * @return     the next token from this string tokenizer.
-	 * @exception  NoSuchElementException  if there are no more tokens in this
-	 *               tokenizer's string.
-	 * @exception  IllegalArgumentException if given parameter string format was wrong  
+	 * 
+	 * @return the next token from this string tokenizer.
+	 * @exception NoSuchElementException
+	 *                if there are no more tokens in this tokenizer's string.
+	 * @exception IllegalArgumentException
+	 *                if given parameter string format was wrong
 	 */
-	public String nextToken()
-		throws NoSuchElementException, IllegalArgumentException {
+	public String nextToken() throws NoSuchElementException,
+	IllegalArgumentException {
 		String token = null;
 		int start;
 		int end;
-
 		if (!this.hasMoreTokens()) {
 			throw new NoSuchElementException();
-		} else {
-			if (this.record.startsWith(this.quate, this.currentIndex)) {
-				String rec = this.record.substring(this.currentIndex + this.quate.length());
-				token = "";
-				for (;;) {
-					end = rec.indexOf(this.quate);
-					if (end < 0) {
-						throw new IllegalArgumentException("Illegal format");
-					}
-					if (!rec.startsWith(this.quate, end + 1)) {
-						token = token + rec.substring(0, end);
-						break;
-					}
-					token = token + rec.substring(0, end + 1);
-					rec = rec.substring(end + this.quate.length() * 2);
-					this.currentIndex++;
+		}
+		if (this.record.startsWith(SGCSVTokenizer.DOUBLE_QUATE,
+				this.currentIndex)) {
+			String rec = this.record.substring(this.currentIndex
+					+ SGCSVTokenizer.DOUBLE_QUATE_LEN);
+			token = ""; //$NON-NLS-1$
+			for (;;) {
+				end = rec.indexOf(SGCSVTokenizer.DOUBLE_QUATE);
+				if (end < 0) {
+					throw new IllegalArgumentException("Illegal format"); //$NON-NLS-1$
 				}
-				this.currentIndex += (token.length() + this.quate.length() * 2 + this.separator.length());
-			} else if (
-				(end = this.record.indexOf(this.separator, this.currentIndex))
-					>= 0) {
-				start = this.currentIndex;
+				if (!rec.startsWith(SGCSVTokenizer.DOUBLE_QUATE, end + 1)) {
+					token = token + rec.substring(0, end);
+					break;
+				}
+				token = token + rec.substring(0, end + 1);
+				rec = rec.substring(end + SGCSVTokenizer.DOUBLE_QUATE_LEN * 2);
+				this.currentIndex++;
+			}
+			// don't trim string
+			this.currentIndex += (token.length()
+					+ SGCSVTokenizer.DOUBLE_QUATE_LEN * 2 + SGCSVTokenizer.SEPARATOR_LEN);
+			if (!this.is_csv_mode) {
+				this.currentIndex = nextTokenIndexOf(this.currentIndex);
+			}
+			if (this.currentIndex >= this.record.length())
+				this.currentIndex = -1;
+		} else {
+			start = this.currentIndex;
+			if (this.is_csv_mode)
+				end = this.record.indexOf(SEPARATOR_COMMA, this.currentIndex);
+			else
+				end = nextSeparatorIndexOf(this.currentIndex);
+			if (end >= 0) {
 				token = this.record.substring(start, end);
-				this.currentIndex = end + separator.length();
+				if (this.is_csv_mode) {
+					this.currentIndex = end + SEPARATOR_LEN;
+				} else {
+					this.currentIndex = nextTokenIndexOf(end);
+					if (this.currentIndex == this.record.length())
+						this.currentIndex = -1;
+				}
 			} else {
-				start = this.currentIndex;
-				token = this.record.substring(start);
-				this.currentIndex = this.record.length();
+				// end of line reached
+				if (this.currentIndex == this.record.length())
+					token = ""; //$NON-NLS-1$
+				else
+					token = this.record.substring(start);
+				this.currentIndex = -1;
 			}
+			token = token.trim();
 		}
-
 		return token;
 	}
 
-	/**
-	 * Returns the next token in this string tokenizer's string. First, 
-	 * the set of characters considered to be separator by this 
-	 * <tt>CSVTokenizer</tt> object is changed to be the characters in 
-	 * the string <tt>separator</tt>. Then the next token in the string
-	 * after the current position is returned. The current position is 
-	 * advanced beyond the recognized token.  The new delimiter set 
-	 * remains the default after this call. 
-	 *
-	 * @param      theSeparator   the new separator.
-	 * @return     the next token, after switching to the new delimiter set.
-	 * @exception  NoSuchElementException  if there are no more tokens in this
-	 *               tokenizer's string.
-	 */
-	public String nextToken(String theSeparator) {
-		separator = theSeparator;
-		return nextToken();
+	private int nextSeparatorIndexOf(int fromIndex) {
+		char c;
+		int cnt = 0;
+		int ii;
+		int len = this.record.length();
+		if (len == fromIndex)
+			return -1;
+		for (ii = fromIndex; ii < len; ii++) {
+			c = this.record.charAt(ii);
+			if (this.mWhiteSpaceList.contains(new Character(c)))
+				break;
+			cnt++;
+		}
+		if (ii == len)
+			return -1;
+		return cnt + fromIndex;
+	}
+
+	private int nextTokenIndexOf(int fromIndex) {
+		char c;
+		int cnt = 0;
+		int len = this.record.length();
+		for (int ii = fromIndex; ii < len; ii++) {
+			c = this.record.charAt(ii);
+			if (!this.mWhiteSpaceList.contains(new Character(c))) {
+				break;
+			}
+			cnt++;
+		}
+		return cnt + fromIndex;
 	}
 
 	/**
-	 * Returns the same value as the <code>hasMoreTokens</code>
-	 * method. It exists so that this class can implement the
-	 * <code>Enumeration</code> interface. 
-	 *
-	 * @return  <code>true</code> if there are more tokens;
-	 *          <code>false</code> otherwise.
-	 * @see     java.util.Enumeration
-	 * @see     java.util.CSVTokenizer#hasMoreTokens()
+	 * Returns the same value as the <code>hasMoreTokens</code> method. It
+	 * exists so that this class can implement the <code>Enumeration</code>
+	 * interface.
+	 * 
+	 * @return <code>true</code> if there are more tokens; <code>false</code>
+	 *         otherwise.
+	 * @see java.util.Enumeration
+	 * @see java.util.SGCSVTokenizer#hasMoreTokens()
 	 */
 	public boolean hasMoreElements() {
 		return hasMoreTokens();
 	}
 
 	/**
-	 * Returns the same value as the <code>nextToken</code> method,
-	 * except that its declared return value is <code>Object</code> rather than
+	 * Returns the same value as the <code>nextToken</code> method, except
+	 * that its declared return value is <code>Object</code> rather than
 	 * <code>String</code>. It exists so that this class can implement the
-	 * <code>Enumeration</code> interface. 
-	 *
-	 * @return     the next token in the string.
-	 * @exception  NoSuchElementException  if there are no more tokens in this
-	 *               tokenizer's string.
-	 * @see        java.util.Enumeration
-	 * @see        java.util.CSVTokenizer#nextToken()
+	 * <code>Enumeration</code> interface.
+	 * 
+	 * @return the next token in the string.
+	 * @exception NoSuchElementException
+	 *                if there are no more tokens in this tokenizer's string.
+	 * @see java.util.Enumeration
+	 * @see java.util.SGCSVTokenizer#nextToken()
 	 */
 	public Object nextElement() {
 		return nextToken();
 	}
 
-	/**
-	 * Calculates the number of times that this tokenizer's 
-	 * <code>nextToken</code> method can be called before it generates an 
-	 * exception. The current position is not advanced.
-	 *
-	 * @return  the number of tokens remaining in the string using the current
-	 *          delimiter set.
-	 * @see     java.util.CSVTokenizer#nextToken()
-	 */
-	public int countTokens() {
-		int count = 0;
-		
-		// <TODO>
-		int preserve = this.currentIndex;
-		while (this.hasMoreTokens()) {
-			this.nextToken();
-			count++;
-		}
-		this.currentIndex = preserve;
-		// </TODO>
-		
-		return count;
-	}
+	// public static void main(String[] args) {
+	// int i = 1;
+	// String str;
+	// String expect;
+	// String result;
+
+	// str = "1, \t 2, \"\" 3, 4, \"a, \"\"\\hoge\"";
+	// str = "1, \t 2, \"\" 3, b 4, a hoge";
+	// System.out.println("String : [" + str + "]");
+	// SGCSVTokenizer csvt = new SGCSVTokenizer(str, true);
+	// i = 1;
+	// while (csvt.hasMoreTokens()) {
+	// try {
+	// expect = String.valueOf(i++);
+	// result = csvt.nextToken();
+	// System.out.print(expect + ": [");
+	// System.out.println(result + "]");
+	// } catch (NoSuchElementException e) {
+	// e.printStackTrace();
+	//	System.exit(-1);
+	//	}
+	//	}
 
-	/**
-	 * Returns the quate.
-	 * @return char
-	 */
-	public String getQuate() {
-		return this.quate;
-	}
+	//	}
 
-	/**
-	 * Sets the quate.
-	 * @param quate The quate to set
-	 */
-	public void setQuate(String quate) {
-		this.quate = quate;
-	}
 }
