/*!
  \file
  \brief 日本語への変換

  \author Satofumi KAMIMURA

  $Id: convertToJp.cpp 1943 2011-03-01 01:11:26Z satofumi $
*/

#include "convertToJp.h"
#include "roman_table.h"
#include "kana_table.h"
#include "Utf8.h"
#include <vector>
#include <cstring>

using namespace qrk;
using namespace std;


namespace
{
    // !!! convertToRoman() 内の関数と共通にすべき
    size_t long_strlen(const unsigned long* text)
    {
        size_t size = 0;
        while (text[size] != 0x0) {
            ++size;
        }
        return size;
    }
}


std::string qrk::convertToJp(const std::string& raw_text, bool is_roman)
{
    const unsigned long* table =
        (is_roman) ? &RomanTable[0][0][0] : &KanaTable[0][0][0];
    const int pattern_max_size = (is_roman) ?
        static_cast<int>(ROMAN_CONVERT_SIZE_MAX) : KANA_CONVERT_SIZE_MAX;

    vector<unsigned short> converted;
    Utf8 utf8_text(raw_text);
    size_t raw_text_size = utf8_text.size();
    for (size_t i = 0; i < raw_text_size; ++i) {
        converted.push_back(utf8_text[i]);
    }

    // 先頭文字から比較と変換を行う
    for (size_t match_start = 0;
         match_start < converted.size(); ++match_start) {
        for (const unsigned long* pattern = table;
             *pattern != 0x0; pattern += (pattern_max_size * 2)) {
            size_t current_size = converted.size();

            bool matched = true;
            int match_index = 0;
            unsigned short ch;
            for (; (ch = *(pattern + match_index)) != 0x0; ++match_index) {
                size_t ch_index = match_start + match_index;
                unsigned short tolowered_ch = converted[ch_index];
                if (is_roman) {
                    tolowered_ch = tolower(tolowered_ch);
                }
                if ((current_size <= ch_index) || (ch != tolowered_ch)) {
                    matched = false;
                    break;
                }
            }

            // パターンが一致していたら、置換する
            if (matched) {
                // 変換後にデータが縮む場合は、あらかじめ縮小しておく
                size_t matched_size = match_index;
                size_t replaced_size = long_strlen(pattern + pattern_max_size);

                // 置換する
                for (size_t i = 0; i < replaced_size; ++i) {
                    converted[match_start + i] =
                        *(pattern + pattern_max_size + i);
                }

                vector<unsigned short>::iterator it =
                    converted.begin() + match_start + matched_size - 1;
                for (size_t i = 0; i < (matched_size - replaced_size); ++i) {
                    it = converted.erase(it);
                    --it;
                }
                break;
            }
        }
    }

    string converted_string;
    size_t n = converted.size();
    for (size_t i = 0; i < n; ++i) {
        const unsigned short ch = converted[i];
        if (ch & 0xff00) {
            converted_string.push_back(0xe3);
            converted_string.push_back((ch >> 8) & 0xff);
            converted_string.push_back(ch & 0xff);
        } else {
            converted_string.push_back(ch);
        }
    }
    return converted_string;
}
