/*
 * Copyright 2013 Yuichiro Moriguchi
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.morilib.awk.parser;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

public final class AwkLexer {

	private static class EndException extends RuntimeException {}

	private static enum St1 {
		INIT, LT, GT, EQ, EX, AND, OR, KEYWORD,
		STRING, STRING_ESC,
		PLUS, MINUS, ASTERISK, SLASH, CARET, PERCENT, DOT,
		ZERO, NUMBER, NUMBER_OCT, NUMBER_HEX,
		FLOAT1, FLOAT2, FLOAT_E1, FLOAT_E2, FLOAT_E3,
	}

	private static final Map<Integer, AwkToken> OP1;
	private static final Map<String, AwkToken> RES1;

	static {
		Map<Integer, AwkToken> o = new HashMap<Integer, AwkToken>();
		Map<String, AwkToken>  r = new HashMap<String, AwkToken>();

		o.put((int)'?', AwkOperator.TRI1);
		o.put((int)':', AwkOperator.TRI2);
		o.put((int)'~', AwkOperator.MATCH);
		o.put((int)'(', AwkOperator.LPAREN);
		o.put((int)')', AwkOperator.RPAREN);
		o.put((int)'{', AwkReserved.BLOCK_B);
		o.put((int)'}', AwkReserved.BLOCK_E);
		o.put((int)',', AwkOperator.COMMA);
		o.put((int)'$', AwkOperator.FIELD);
		o.put((int)';', AwkReserved.SEMICL);
		o.put((int)'\n', AwkReserved.EOL);
		o.put((int)'[', AwkOperator.LBRAKET);
		o.put((int)']', AwkOperator.RBRAKET);
		o.put((int)'@', AwkOperator.REFFN);
		OP1 = Collections.unmodifiableMap(o);

		r.put("if", AwkReserved.IF);
		r.put("else", AwkReserved.ELSE);
		r.put("for", AwkReserved.FOR);
		r.put("in", AwkReserved.IN);
		r.put("while", AwkReserved.WHILE);
		r.put("do", AwkReserved.DO);
		r.put("break", AwkReserved.BREAK);
		r.put("continue", AwkReserved.CONT);
		r.put("next", AwkReserved.NEXT);
		r.put("exit", AwkReserved.EXIT);
		r.put("delete", AwkReserved.DELETE);
		r.put("function", AwkReserved.FUNC);

		r.put("print", AwkReserved.PRINT);
		r.put("printf", AwkReserved.PRINTF);
		r.put("getline", AwkReserved.GETLINE);
		RES1 = Collections.unmodifiableMap(r);
	}

	private AwkToken token;
	private PushbackReader reader;
	private int headchr = 0;

	/**
	 * 
	 * @param rd
	 */
	public AwkLexer(Reader rd) throws IOException {
		reader  = new PushbackReader(rd);
		headchr = reader.read();
		if(headchr >= 0)  reader.unread(headchr);
		token   = getToken(reader);
	}

	public AwkToken getToken() {
		return token;
	}

	public AwkToken nextToken() throws IOException {
		if(!token.equals(AwkReserved.ENDMARKER)) {
			headchr = reader.read();
			if(headchr >= 0)  reader.unread(headchr);
			token = getToken(reader);
		}
		return token;
	}

	public boolean isEos() throws IOException {
		return (token.equals(AwkReserved.SEMICL) ||
				token.equals(AwkReserved.ENDMARKER) ||
				token.equals(AwkReserved.IN) ||
				token.equals(AwkReserved.APNDOUT) ||
				headchr == '\n');
	}

	public boolean eatEos() throws IOException {
		if(token.equals(AwkReserved.SEMICL)) {
			nextToken();
			return true;
		} else {
			return (token.equals(AwkReserved.ENDMARKER) ||
					headchr == '\n');
		}
	}

	public AwkToken eatToken(AwkToken t) throws IOException {
		if(!token.equals(t)) {
			throw new AwkSyntaxException(t, token);
		}
		return nextToken();
	}

	public AwkToken eatTokenOpt(AwkToken t) throws IOException {
		if(token.equals(t))  return nextToken();
		return token;
	}

	public String getPattern() throws IOException {
		return getPattern(reader);
	}

	private static int rde(PushbackReader rd) throws IOException {
		int c;

		if((c = rd.read()) < 0) {
			throw new AwkLexerException();
		} else {
			return c;
		}
	}

	private static int skipws(PushbackReader rd) throws IOException {
		int c;

		while((c = rd.read()) >= 0 && Character.isWhitespace(c));
		if(c < 0)  throw new EndException();
		return c;
	}

	private static AwkToken getr(String s) {
		return RES1.containsKey(s) ?
				RES1.get(s) : AwkSymbol.getInstance(s);
	}

	private static boolean isnum(int c) {
		return c >= '0' && c <= '9';
	}

	private static boolean isAwkIdentifierStart(int c) {
		return (Character.isLetter(c) ||
				(Character.isDigit(c) && !isnum(c)) ||
				Character.getType(c) == Character.LETTER_NUMBER ||
				c == '_');
	}

	private static boolean isAwkIdentifierPart(int c) {
		return (Character.isLetter(c) ||
				Character.isDigit(c)  ||
				Character.getType(c) == Character.LETTER_NUMBER ||
				c == '_');
	}

	private static int getoct1(PushbackReader rd) throws IOException {
		int c;

		if((c = rde(rd)) < '0' || c > '7') {
			throw new AwkLexerException();
		}
		return c - '0';
	}

	private static int getoct(PushbackReader rd) throws IOException {
		return getoct1(rd) * 64 + getoct1(rd) * 8 + getoct1(rd);
	}

	private static int gethex1(PushbackReader rd) throws IOException {
		int c;

		if(((c = rde(rd)) >= '0' && c <= '9')) {
			return c - '0';
		} else if(c >= 'a' && c <= 'f') {
			return c - 'a' + 10;
		} else if(c >= 'A' && c <= 'F') {
			return c - 'A' + 10;
		} else {
			throw new AwkLexerException();
		}
	}

	private static int gethex(PushbackReader rd) throws IOException {
		return (gethex1(rd) * 0x1000 + gethex1(rd) * 0x100 +
				gethex1(rd) * 0x10   + gethex1(rd));
	}

	static AwkToken _getToken(
			PushbackReader rd) throws IOException {
		StringBuffer b1 = new StringBuffer();
		St1 stat = St1.INIT;
		int c;

		while(true) {
			switch(stat) {
			case INIT:
				if((c = skipws(rd)) == '<') {
					stat = St1.LT;
				} else if(c == '>') {
					stat = St1.GT;
				} else if(c == '=') {
					stat = St1.EQ;
				} else if(c == '!') {
					stat = St1.EX;
				} else if(c == '&') {
					stat = St1.AND;
				} else if(c == '|') {
					stat = St1.OR;
				} else if(c == '\"') {
					stat = St1.STRING;
				} else if(c == '+') {
					stat = St1.PLUS;
				} else if(c == '-') {
					stat = St1.MINUS;
				} else if(c == '*') {
					stat = St1.ASTERISK;
				} else if(c == '/') {
					stat = St1.SLASH;
				} else if(c == '%') {
					stat = St1.PERCENT;
				} else if(c == '^') {
					stat = St1.CARET;
				} else if(OP1.containsKey(c)) {
					return OP1.get(c);
				} else if(c == '0') {
					stat = St1.ZERO;
				} else if(c >= '1' && c <= '9') {
					b1 = new StringBuffer().append((char)c);
					stat = St1.NUMBER;
				} else if(c == '.') {
					b1 = new StringBuffer().append((char)c);
					stat = St1.DOT;
				} else if(isAwkIdentifierStart(c)) {
					b1 = new StringBuffer().appendCodePoint(c);
					stat = St1.KEYWORD;
				}
				break;
			case LT:
				if((c = rd.read()) < 0) {
					return AwkRelop.LT;
				} else if(c == '=') {
					return AwkRelop.LE;
				} else {
					rd.unread(c);
					return AwkRelop.LT;
				}
			case GT:
				if((c = rd.read()) < 0) {
					return AwkRelop.GT;
				} else if(c == '=') {
					return AwkRelop.GE;
				} else if(c == '>') {
					return AwkReserved.APNDOUT;
				} else {
					rd.unread(c);
					return AwkRelop.GT;
				}
			case EQ:
				if((c = rd.read()) < 0) {
					return AwkAssignop.ASSIGN;
				} else if(c == '=') {
					return AwkRelop.EQ;
				} else {
					rd.unread(c);
					return AwkAssignop.ASSIGN;
				}
			case EX:
				if((c = rd.read()) < 0) {
					return AwkOperator.L_NOT;
				} else if(c == '=') {
					return AwkRelop.NE;
				} else if(c == '~') {
					return AwkOperator.NMATCH;
				} else {
					rd.unread(c);
					return AwkOperator.L_NOT;
				}
			case AND:
				if((c = rd.read()) < 0) {
//					return AwkOperator.B_AND;
					throw new AwkLexerException();
				} else if(c == '&') {
					return AwkOperator.L_AND;
//				} else if(c == '=') {
//					return AwkOperator.A_B_AND;
				} else {
//					rd.unread(c);
//					return AwkOperator.B_AND;
					throw new AwkLexerException();
				}
			case OR:
				if((c = rd.read()) < 0) {
//					return AwkOperator.B_OR;
					throw new AwkLexerException();
				} else if(c == '|') {
					return AwkOperator.L_OR;
//				} else if(c == '=') {
//					return AwkOperator.A_B_OR;
				} else {
//					rd.unread(c);
//					return AwkOperator.B_OR;
					throw new AwkLexerException();
				}
			case PLUS:
				if((c = rd.read()) < 0) {
					return AwkOperator.ADD;
				} else if(c == '+') {
					return AwkOperator.INC;
				} else if(c == '=') {
					return AwkAssignop.A_ADD;
				} else {
					rd.unread(c);
					return AwkOperator.ADD;
				}
			case MINUS:
				if((c = rd.read()) < 0) {
					return AwkOperator.SUB;
				} else if(c == '-') {
					return AwkOperator.DEC;
				} else if(c == '=') {
					return AwkAssignop.A_SUB;
				} else {
					rd.unread(c);
					return AwkOperator.SUB;
				}
			case ASTERISK:
				if((c = rd.read()) < 0) {
					return AwkOperator.MUL;
				} else if(c == '=') {
					return AwkAssignop.A_MUL;
				} else {
					rd.unread(c);
					return AwkOperator.MUL;
				}
			case SLASH:
				if((c = rd.read()) < 0) {
					return AwkOperator.DIV;
				} else if(c == '=') {
					return AwkAssignop.A_DIV;
				} else {
					rd.unread(c);
					return AwkOperator.DIV;
				}
			case PERCENT:
				if((c = rd.read()) < 0) {
					return AwkOperator.MOD;
				} else if(c == '=') {
					return AwkAssignop.A_MOD;
				} else {
					rd.unread(c);
					return AwkOperator.MOD;
				}
			case CARET:
				if((c = rd.read()) < 0) {
					return AwkOperator.POW;
				} else if(c == '=') {
					return AwkAssignop.A_POW;
				} else {
					rd.unread(c);
					return AwkOperator.POW;
				}
			case DOT:
				if((c = rd.read()) < 0) {
					return AwkOperator.NAME;
				} else if(c >= '0' && c <= '9') {
					b1.append('.').append((char)c);
					stat = St1.FLOAT2;
				} else {
					rd.unread(c);
					return AwkOperator.NAME;
				}
			case KEYWORD:
				if((c = rd.read()) < 0) {
					return getr(b1.toString());
				} else if(!isAwkIdentifierPart(c)) {
					rd.unread(c);
					return getr(b1.toString());
				} else {
					b1.appendCodePoint(c);
				}
				break;
			case STRING:
				if((c = rde(rd)) == '\\') {
					stat = St1.STRING_ESC;
				} else if(c == '\"') {
					return AwkStringToken.getInstance(b1.toString());
				} else {
					b1.appendCodePoint(c);
				}
				break;
			case STRING_ESC:
				if((c = rde(rd)) == '\"') {
					b1.append('\"');
				} else if(c == 'n') {
					b1.append('\n');
				} else if(c == 'r') {
					b1.append('\r');
				} else if(c == 'b') {
					b1.append('\b');
				} else if(isnum(c)) {
					rd.unread(c);
					b1.append((char)getoct(rd));
				} else if(c == 'u') {
					b1.append((char)gethex(rd));
				} else {
					b1.appendCodePoint(c);
				}
				stat = St1.STRING;
				break;
			case ZERO:
				if((c = rd.read()) < 0) {
					return new AwkIntegerToken("0", 10);
				} else if(c == 'x') {
					stat = St1.NUMBER_HEX;
				} else if(c >= '0' && c <= '9') {
					b1.append((char)c);
					stat = St1.NUMBER_OCT;
				} else {
					rd.unread(c);
					return new AwkIntegerToken("0", 10);
				}
				break;
			case NUMBER:
				if((c = rd.read()) < 0) {
					return new AwkIntegerToken(b1.toString(), 10);
				} else if(c >= '0' && c <= '9') {
					b1.append((char)c);
				} else if(c == '.') {
					b1.append((char)c);
					stat = St1.FLOAT1;
				} else if(c == 'e' || c == 'E') {
					b1.append((char)c);
					stat = St1.FLOAT_E1;
				} else {
					rd.unread(c);
					return new AwkIntegerToken(b1.toString(), 10);
				}
				break;
			case NUMBER_OCT:
				if((c = rd.read()) < 0) {
					return new AwkIntegerToken(b1.toString(), 8);
				} else if(c >= '0' && c <= '9') {
					b1.append((char)c);
				} else {
					rd.unread(c);
					return new AwkIntegerToken(b1.toString(), 8);
				}
				break;
			case NUMBER_HEX:
				if((c = rd.read()) < 0) {
					return new AwkIntegerToken(b1.toString(), 16);
				} else if((c >= '0' && c <= '9') ||
						(c >= 'a' && c <= 'f') ||
						(c >= 'A' && c <= 'F')) {
					b1.append((char)c);
				} else {
					rd.unread(c);
					return new AwkIntegerToken(b1.toString(), 16);
				}
				break;
			case FLOAT1:
				if((c = rde(rd)) >= '0' && c <= '9') {
					b1.append((char)c);
					stat = St1.FLOAT2;
				} else {
					throw new AwkLexerException();
				}
				break;
			case FLOAT2:
				if((c = rd.read()) < 0) {
					return new AwkFloatToken(b1.toString());
				} else if(c >= '0' && c <= '9') {
					b1.append((char)c);
				} else if(c == 'e' || c == 'E') {
					b1.append((char)c);
					stat = St1.FLOAT_E1;
				} else {
					rd.unread(c);
					return new AwkFloatToken(b1.toString());
				}
				break;
			case FLOAT_E1:
				if((c = rde(rd)) >= '0' && c <= '9') {
					b1.append((char)c);
					stat = St1.FLOAT_E3;
				} else if(c == '+' || c == '-') {
					b1.append((char)c);
					stat = St1.FLOAT_E2;
				} else {
					throw new AwkLexerException();
				}
				break;
			case FLOAT_E2:
				if((c = rde(rd)) >= '0' && c <= '9') {
					b1.append((char)c);
					stat = St1.FLOAT_E3;
				} else {
					throw new AwkLexerException();
				}
				break;
			case FLOAT_E3:
				if((c = rd.read()) < 0) {
					return new AwkFloatToken(b1.toString());
				} else if(c >= '0' && c <= '9') {
					b1.append((char)c);
				} else {
					rd.unread(c);
					return new AwkFloatToken(b1.toString());
				}
				break;
			}
		}
	}

	/**
	 * 
	 * @param rd
	 * @return
	 * @throws IOException
	 */
	public static AwkToken getToken(
			PushbackReader rd) throws IOException {
		try {
			return _getToken(rd);
		} catch(EndException e) {
			return AwkReserved.ENDMARKER;
		}
	}

	/**
	 * 
	 * @param rd
	 * @return
	 * @throws IOException
	 */
	public static String getPattern(
			PushbackReader rd) throws IOException {
		StringBuffer b = new StringBuffer();
		boolean esc = false;
		int c;

		while(true) {
			if((c = rd.read()) < 0) {
				throw new AwkLexerException();
			} else if(esc) {
				if(c != '/')  b.append('\\');
				esc = false;
			} else if(c == '/') {
				return b.toString();
			} else {
				esc = c == '\\';
			}
			b.appendCodePoint(c);
		}
	}

}
