/*!
  \file
  \brief 日本語のローマ字文字列への変換

  \author Satofumi KAMIMURA

  $Id: convertToRoman.cpp 1823 2010-05-11 22:11:47Z satofumi $
*/

#include "convertToRoman.h"
#include "roman_table.h"
#include "Utf8.h"
#include <vector>
#include <cstring>

using namespace qrk;
using namespace std;


namespace
{
    size_t long_strlen(const unsigned long* text)
    {
        size_t size = 0;
        while (text[size] != 0x0) {
            ++size;
        }
        return size;
    }


    bool isConvertedCorrect(const unsigned long* pattern,
                            const Utf8& jp_text, size_t used_jp_index)
    {
        size_t converted_size = long_strlen(pattern + ROMAN_CONVERT_SIZE_MAX);

        for (size_t i = 0; i < converted_size; ++i) {
            if (*(pattern + ROMAN_CONVERT_SIZE_MAX + i) !=
                jp_text.ch(used_jp_index + i)) {
                return false;
            }
        }

        return true;
    }


    void convertUsingInput(Utf8& created_jp_utf8, string& created_roman,
                           const Utf8& jp_text, const string& current_input)
    {
        // 順に変換候補を使ったローマ字の文字列の生成を行い、
        // 変換に失敗した時点で戻る
        string created_jp;
        size_t used_jp_index = 0;
        const char* input = current_input.c_str();
        while (*input != '\0') {
            bool matched = false;
            const unsigned long* pattern = &RomanTable[0][0][0];
            for (; *pattern != 0x0; pattern += ROMAN_CONVERT_SIZE_MAX * 2) {
                size_t match_size = 0;
                for (match_size = 0;
                     *(pattern + match_size) != 0x0; ++match_size) {
                    const unsigned long ch = input[match_size];
                    if (*(pattern + match_size) != ch) {
                        break;
                    }
                }

                if ((match_size > 0) && (input[match_size] == '\0')) {
                    match_size = long_strlen(pattern);
                }

                if (*(pattern + match_size) == 0x0) {
                    // パターンが入力候補とも一致したら置換処理を行う
                    if (isConvertedCorrect(pattern, jp_text, used_jp_index)) {
                        for (const unsigned long* ch =
                                 pattern + ROMAN_CONVERT_SIZE_MAX;
                             *ch != 0x0; ++ch) {
                            created_jp.push_back(0xe3);
                            created_jp.push_back((*ch >> 8) & 0xff);
                            created_jp.push_back(*ch & 0xff);
                        }

                        for (size_t i = 0; i < match_size; ++i) {
                            created_roman.push_back(*(pattern + i));
                        }

                        matched = true;
                        input += min(match_size, strlen(input));
                    }
                }
            }
            if (! matched) {
                break;
            }
        }

        created_jp_utf8 = Utf8(created_jp.c_str());
    }


    string convertLeftText(const Utf8& input)
    {
        // 変換文字列を代入
        size_t input_size = input.size();

        vector<unsigned long> converted;
        for (size_t i = 0; i < input_size; ++i) {
            converted.push_back(input.ch(i));
        }

        for (int match_start = converted.size() - 1;
             match_start >= 0; --match_start) {
            for (int pattern_index = 0;
                 RomanTable[pattern_index][1][0] != 0x0; ++pattern_index) {
                int current_size = converted.size();

                bool matched = true;
                int match_index = 0;
                for (; RomanTable[pattern_index][1][match_index] != 0x0;
                     ++match_index) {
                    unsigned long ch =
                        RomanTable[pattern_index][1][match_index];
                    int ch_index = match_start + match_index;
                    if ((current_size <= ch_index) ||
                        (ch != converted[ch_index])) {
                        matched = false;
                        break;
                    }
                }

                // パターンが一致していたら、置換する
                if (matched) {
                    // nba -> んば のように置換後に伸びる場合は
                    // 置換前に伸長しておく
                    size_t matched_size = match_index;
                    size_t replaced_size =
                        long_strlen(&RomanTable[pattern_index][0][0]);

                    vector<unsigned long>::iterator it =
                        converted.begin() + match_start;
                    for (size_t i = 0;
                         i < (replaced_size - matched_size); ++i) {
                        it = converted.insert(it, 0x0);
                    }

                    // 置換する
                    for (size_t i = 0; i < replaced_size; ++i) {
                        converted[match_start + i] =
                            RomanTable[pattern_index][0][i];
                    }
                    break;
                }
            }
        }

        string converted_string;
        size_t n = converted.size();
        for (size_t i = 0; i < n; ++i) {
            converted_string.push_back(converted[i]);
        }
        return converted_string;
    }
}


std::string qrk::convertToRoman(const std::string& jp_text,
                                const std::string& current_input)
{
    // 入力候補に対応するローマ字の文字列を生成する
    Utf8 created_jp("");
    string created_roman;
    convertUsingInput(created_jp, created_roman, Utf8(jp_text), current_input);

    // 残りのローマ字の文字列を生成する
    Utf8 jp_text_utf8(jp_text);
    size_t left_size = jp_text_utf8.size() - created_jp.size();
    Utf8 remain_jp_text = jp_text_utf8.substr(created_jp.size(), left_size);

    // 入力候補を使って変換できた文字列、を取り除いた後半の文字列に対して
    // ローマ字の文字列を末尾から評価して生成する
    return created_roman + convertLeftText(remain_jp_text);
}
