/*
 * Copyright (C) 2009 by Aiwota Programmer
 * aiwotaprog@tetteke.tk
 *
 * This file is part of Dialektos.
 *
 * Dialektos is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Dialektos is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Dialektos.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "cp932.hxx"

#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <string>
#include <sstream>


namespace dialektos {

namespace convert {


CP932toUTF8::CP932toUTF8() : CP932Table(), buffer_(), lead_(0) {
}

void CP932toUTF8::bmp_to_buffer(unsigned short bmp) {
  if (bmp >= 0x0000 && bmp <= 0x007f) {
    // ascii
    buffer_ +=  char(bmp);
  } else if (bmp >= 0x0080 && bmp <= 0x07ff) {
    // 00000xxxxxyyyyyy -> 110xxxxx 10yyyyyy
    char ch1 = (bmp >> 6) | 0xc0;
    buffer_ += ch1;
    char ch2 = (bmp & 0x3f) | 0x80;
    buffer_ += ch2;
  } else /*if (bmp >= 0x0800 && bmp <= 0xffff)*/ {
    // xxxxyyyyyyzzzzzz -> 1110xxxx 10yyyyyy 10zzzzzz
    char ch1 = (bmp >> 12) | 0xe0;
    buffer_ += ch1;
    char ch2 = ((bmp >> 6) & 0x3f) | 0x80;
    buffer_ += ch2;
    char ch3 = (bmp & 0x3f) | 0x80;
    buffer_ += ch3;
  }
}

void CP932toUTF8::cp932_to_buffer(unsigned char ch) {
  const unsigned short bmp = to_bmp(ch);
  if (bmp == 0) unknown_to_buffer(ch);
  else bmp_to_buffer(bmp);
}

void CP932toUTF8::cp932_to_buffer(unsigned char ch1, unsigned char ch2) {
  const unsigned short bmp = to_bmp(ch1, ch2);
  if (bmp == 0) unknown_to_buffer(ch1, ch2);
  else bmp_to_buffer(bmp);
}

void CP932toUTF8::unknown_to_buffer(unsigned char ch) {
  std::stringstream ss;
  ss << boost::format("\\x%02x") % int(ch);
  buffer_ += ss.str();
}

void CP932toUTF8::unknown_to_buffer(unsigned char ch1, unsigned char ch2) {
  std::stringstream ss;
  ss << boost::format("\\x%02x\\x%02x") % int(ch1) % int(ch2);
  buffer_ += ss.str();
}

std::string CP932toUTF8::operator()(const std::string& input) {
  BOOST_FOREACH(const unsigned char ch, input) {
    if (lead_ > 0) {
      cp932_to_buffer(lead_, ch);
      lead_ = 0;
      continue;
    }

    if (ch >= 0 && ch <= 0x7f) {
      // ascii
      lead_ = 0;
      buffer_ += ch;
    } else if (ch == 0x80) {
      // undefined
      lead_ = 0;
      unknown_to_buffer(ch);
    } else if (ch >= 0x81 && ch <= 0x9f) {
      // dbcs lead byte
      lead_ = ch;
    } else if (ch == 0xa0) {
      // undefined
      lead_ = 0;
      unknown_to_buffer(ch);
    } else if (ch >= 0xa1 && ch <= 0xdf) {
      // half witdh katakana
      lead_ = 0;
      cp932_to_buffer(ch);
    } else if (ch >= 0xe0 && ch <= 0xfc) {
      // dbcs lead byte
      lead_ = ch;
    } else /*if (ch >= 0xfd && ch <= 0xff)*/ {
      // undefined
      lead_ = 0;
      unknown_to_buffer(ch);
    }
  }

  return buffer_;
}

std::string cp932(const std::string& input) {
  CP932toUTF8 conv;
  return conv(input);
}


} // namespace convert

} // namespace dialektos
