/*
 * Programming Language SOOPY
 *   (Simple Object Oriented Programming sYstem)
 * 
 * Copyright (C) 2002,2003 SUZUKI Jun
 * 
 * URL: http://sourceforge.jp/projects/soopy/
 * License: GPL(GNU General Public License)
 * 
 * 
 * $Id: Lexer.cpp,v 1.31 2004/07/04 12:32:36 randy Exp $
 */

#include "soopy.h"
//#include "ytab.h"
#include "parser.tab.hpp"


Lexer* Lexer::lexer = NULL;

Lexer::Lexer(ReadEncoder& r)
     : reader(&r)
{
    wdot_read = false;

    // init reserved word table
    //       true, false, nil
    //       fun, property, const
    //       if, loop, break, redo
    //       retry, let, private, public
    //       or, and
    reserved[new SpString("true")] = TokTRUE;
    reserved[new SpString("false")] = TokFALSE;
    reserved[new SpString("nil")] = NIL;
    reserved[new SpString("fun")] = FUN;
    reserved[new SpString("property")] = PROPERTY;
    reserved[new SpString("const")] = CONSTANT;
    //reserved[new SpString("if")] = IF;
    reserved[new SpString("match")] = MATCH;
    reserved[new SpString("loop")] = LOOP;
    reserved[new SpString("exit")] = EXIT;
    reserved[new SpString("next")] = NEXT;
    reserved[new SpString("retry")] = RETRY;
    reserved[new SpString("let")] = LET;
    reserved[new SpString("private")] = PRIVATE;
    reserved[new SpString("public")] = PUBLIC;
    reserved[new SpString("or")] = OR;
    reserved[new SpString("and")] = AND;
    reserved[new SpString("array")] = ARRAY;
    reserved[new SpString("datatype")] = DATATYPE;
}

Lexer::~Lexer()
{
    KeywordMap::iterator it;
    for(it = reserved.begin(); it != reserved.end(); it++){
        delete it->first;
    }
    if(lexer){
        delete lexer;
    }
}

SpChar Lexer::skipWhiteSpace()
{
    SpChar c;
    while(true){
        c = reader->ReadAsLowerChar();
        if(isEOL(c)){
            lineno++;
        }
        if(!isWhiteSpace(c)){
            break;
        }
    }
    return c;
}

void Lexer::skipOneLine()
{
    SpChar c;
    while(true){
        c = reader->ReadAsLowerChar();
        if(isEOL(c)){
            lineno++;
            break;
        }
    }
}

void Lexer::skipComment()
{
    SpChar c;
    unsigned int depth = 1;
    while(true){
        c = reader->ReadAsLowerChar();
        if(isEOL(c)){
            lineno++;
        }else if(c == JIS('*')){
            c = reader->ReadAsLowerChar();
            if(c == JIS('/')){
                depth--;
                if(depth == 0){
                    break;
                }
            }
        }else if(c == JIS('/')){
            c = reader->ReadAsLowerChar();
            if(c == JIS('*')){
                depth++;
            }
        }
    }
}

inline bool isBinDIGIT(SpChar c)
{
    if(JIS('0') <= c && c <= JIS('1')){
        return true;
    }
    return false;
}

inline bool isOctDIGIT(SpChar c)
{
    if(JIS('0') <= c && c <= JIS('7')){
        return true;
    }
    return false;
}

inline bool isHexDIGIT(SpChar c)
{
    if((JIS('0') <= c && c <= JIS('9')) ||
       (JIS('a') <= c && c <= JIS('f'))){
        return true;
    }
    return false;
}

int Lexer::lexBIN()
{
    SpInt i = 0;
    SpChar c;

    while(true){
        c = reader->ReadAsLowerChar();
        if(!isBinDIGIT(c)){
            reader->UnreadAsLowerChar(c);
            break;
        }
        i = (i * 2) + toINT(c);
    }
    yylval.setInt(i);
    return INT;
}

int Lexer::lexOCT()
{
    SpInt i = 0;
    SpChar c;

    while(true){
        c = reader->ReadAsLowerChar();
        if(!isOctDIGIT(c)){
            reader->UnreadAsLowerChar(c);
            break;
        }
        i = (i * 8) + toINT(c);
    }
    yylval.setInt(i);
    return INT;
}

int Lexer::lexHEX()
{
    SpInt i = 0;
    SpChar c;

    while(true){
        c = reader->ReadAsLowerChar();
        if(!isHexDIGIT(c)){
            reader->UnreadAsLowerChar(c);
            break;
        }
        if(isDIGIT(c)){
            i = (i * 16) + toINT(c);
        }else{
            i = (i * 16) + (c - JIS('a') + 10);
        }
    }
    yylval.setInt(i);
    return INT;
}

int Lexer::lexZeroNumber()
{
    SpChar c = reader->ReadAsLowerChar();
    switch(c){
      case JIS('.'): // Real or '..'
        integer = 0;
        //        buf[0] = MakeSpChar(CodeJIS, '0');
        //        buf[1] = MakeSpChar(CodeJIS, '.');
        buf[0] = '0';
        buf[1] = '.';
        index = 2;
        return lexReal();
      case JIS('x'): // HEX. ex. 0x3e, 0xff
        return lexHEX();
      case JIS('b'): // BIN. ex. 0b1001 0b01011100
        return lexBIN();
      default:
        if(isOctDIGIT(c)){ // OCT. ex. 0712
            reader->UnreadAsLowerChar(c);
            return lexOCT();
        }else{
            //throw SpException("illegal number format (bin, oct, hex)");
            reader->UnreadAsLowerChar(c);
            yylval.setInt(0);
            return INT;
        }
    }
}

int Lexer::lexNumber()
{
//    bool isInteger = true;
    SpChar c;

    integer = 0;
    index = 0;
    while(true){
        if(index >= MaxBuf){
            throw SpException("too long num format");
        }
        c = reader->ReadAsLowerChar();
        if(!isDIGIT(c)){
            if(c == JIS('.')){
//                isInteger = false;
                //buf[index] = toHankakuLower(c);
                buf[index] = c;
                index++;
                return lexReal();
//                break;
            }
            reader->UnreadAsLowerChar(c);
            break;
        }
        integer = (integer * 10) + toINT(c);
        buf[index] = toHankakuLower(c);
        index++;
    }
//    if(isInteger){
        yylval.setInt(integer);
        return INT;
//    }
}

int Lexer::lexReal()
{
    SpChar c;
    bool notHaveE = true;

    // check '..'
    c = reader->ReadAsLowerChar();
    if(c == JIS('.')){
        yylval.setInt(integer);
        wdot_read = true;
        return INT;
    }
    reader->UnreadAsLowerChar(c);
    // Real
    while(true){
        if(index >= MaxBuf){
            throw SpException("too long num format");
        }
        c = reader->ReadAsLowerChar();
        if(!isDIGIT(c)){
            if((c == JIS('e')) || (c == JIS('E'))){
                notHaveE = false;
                buf[index] = toHankakuLower(c);
                index++;
                break;
            }
            reader->UnreadAsLowerChar(c);
            break;
        }
        buf[index] = toHankakuLower(c);
        index++;
    }
    if(notHaveE){
        buf[index] = '\0';
        yylval.setReal(atof(buf));
        return REAL;
    }
    // 000.000e...
    c = reader->ReadAsLowerChar();
    if((c == JIS('+')) || (c == JIS('-'))){
        buf[index] = toHankakuLower(c);
        index++;
    }else if(isDIGIT(c)){
        buf[index] = toHankakuLower(c);
        index++;
    }else{
        throw SpException("illegal num format");
    }
    while(true){
        if(index >= MaxBuf){
            throw SpException("too long num format");
        }
        c = reader->ReadAsLowerChar();
        if(!isDIGIT(c)){
            reader->UnreadAsLowerChar(c);
            break;
        }
        buf[index] = toHankakuLower(c);
        index++;
    }
    buf[index] = '\0';
    yylval.setReal(atof(buf));
    return REAL;
}

SpChar Lexer::lexBackslashChar()
{
    SpChar c;

    c = reader->ReadChar();
    switch(c){
      case JIS('r'): // CR
        c = MakeSpChar(CodeJIS, 0x0d);
        break;
      case JIS('n'): // LF
        c = MakeSpChar(CodeJIS, 0x0a);
        break;
//      default:
//        if(JIS('a') <= c && c <= JIS('z')){ // control code
//           c = MakeSpChar(CodeJIS, SpCharGetChar(c) - 'a');
//        }
    }
    return c;
}

int Lexer::lexString()
{
    SpCharVector vec;
    SpChar c;
    while(true){
        c = reader->ReadAsLowerChar();
        if(c == JIS('"')){
            break;
        }
        if(c == JIS('\\')){
            //c = reader->ReadChar();
            c = lexBackslashChar();
        }else{
            reader->UnreadAsLowerChar(c);
            c = reader->ReadChar();
        }
        vec.push_back(c);
    }
    yylval.setNewObject(new SpString(vec));
    return STRING;
}

int Lexer::lexChar()
{
    SpChar c = reader->ReadChar();
    if(toHankakuLower(c) == JIS('\\')){
        c = lexBackslashChar();
    }
    if(reader->ReadAsLowerChar() != JIS('\'')){
        throw SpException("illegal char format");
    }
    yylval.setSpChar(c);
    return CHAR;
}

int Lexer::isReserved(SpCharVector& vec)
{
    KeywordMap::iterator it;
    SpString str(vec);
    it = reserved.find(&str);
    if(it != reserved.end()){
        return it->second;
    }
    return 0; // not Reserved.
}

bool Lexer::isReservedKigou(SpChar c)
{
    switch(c){
    case JIS('"'):
    case JIS('\''):
    case JIS('#'):
    case JIS('$'):
    case JIS('('):
    case JIS(')'):
    case JIS('['):
    case JIS(']'):
    case JIS('{'):
    case JIS('}'):
    case JIS(','):
    case JIS('|'):
    case JIS(';'):
    case JIS('@'):
    case JIS('='):
    case JIS('+'):
    case JIS('-'):
    case JIS('*'):
    case JIS('/'):
    case JIS('%'):
    case JIS('<'):
    case JIS('>'):
    case JIS('!'):
    case JIS('.'):
    case JIS(':'):
        return true;
    }
    return false;
}

int Lexer::lex()
{
    SpChar c;

    if(wdot_read){
        wdot_read = false;
        return WDOT;
    }

start:
    c = skipWhiteSpace();
    if(isEOF(c)){
        return EndOfStream;
    }

    SpCharVector vec;
    vec.push_back(c);
    if(isDIGIT(c)){
        if(c == JIS('0')){
            return lexZeroNumber();
        }else{
            reader->UnreadAsLowerChar(c);
            return lexNumber();
        }
    }
    switch(c){
    case JIS('"'):  // string
        return lexString();
    case JIS('\''): // char
        return lexChar();
    case JIS('#'):  // a line comment
        skipOneLine();
        goto start;
    case JIS('$'):
        return DOLL;
    case JIS('('):
        return LP;
    case JIS(')'):
        return RP;
    case JIS('['):
        return LB;
    case JIS(']'):
        return RB;
    case JIS('{'):
        return LC;
    case JIS('}'):
        return RC;
    case JIS(','):
        return COMMA;
    case JIS('|'):
        return BAR;
    case JIS(';'):
        return SEMI;
    case JIS('@'):
        return AT;
    case JIS('='):
        c = reader->ReadAsLowerChar();
        if(c == JIS('=')){
            return EQ;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return ASSIGN;
    case JIS('+'):
        return PLUS;
    case JIS('-'):
        return MINUS;
    case JIS('*'):
        return TIMES;
    case JIS('/'): /* "/", "//" */
        c = reader->ReadAsLowerChar();
        if(c == JIS('/')){
            return WSLASH;
        }else if(c == JIS('*')){
            skipComment();
            goto start;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return DIV;
    case JIS('%'):
        return MOD;
    case JIS('<'): /* "<", "<=", "<-" */
        c = reader->ReadAsLowerChar();
        if(c == JIS('=')){
            return LE;
        }else if(c == JIS('-')){
            return LARROW;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return LT;
    case JIS('>'): /* ">", ">=" */
        c = reader->ReadAsLowerChar();
        if(c == JIS('=')){
            return GE;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return GT;
    case JIS('!'): /* "!", "!=" */
        c = reader->ReadAsLowerChar();
        if(c == JIS('=')){
            return NE;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return NOT;
    case JIS('.'): /* ".", ".." */
        c = reader->ReadAsLowerChar();
        if(c == JIS('.')){
            return WDOT;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return DOT;
    case JIS(':'): /* ":", "::" */
        c = reader->ReadAsLowerChar();
        if(c == JIS(':')){
            return WCOLON;
        }else{
            reader->UnreadAsLowerChar(c);
        }
        return COLON;
    }

    // lex SYMBOL
    //  or Reserved
    //       true, false, nil
    //       fun, property, const
    //       if, loop, break, redo
    //       retry, let, private, publid
    //       or, and
    while(true){
        c = reader->ReadAsLowerChar();
        if(isWhiteSpace(c)){
            break;
        }
        if(isReservedKigou(c)){
            reader->UnreadAsLowerChar(c);
            break;
        }
        vec.push_back(c);
    }
    int p = isReserved(vec);
    if(p){
        return p;
    }
    yylval.setNewObject(new SpSymbol(vec));
    return SYMBOL;
}

// end -- class Lexer

int yylex()
{
    Lexer* aLexer = Lexer::getLexer();
    int i = aLexer->lex();
//cout << "lex: " << i << endl;
    return i;
}
