



#ifndef __UTF8_H__
#define __UTF8_H__

#include <sys/types.h>
#include <inttypes.h>

// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.


#define UTF8_ACCEPT 0
#define UTF8_REJECT 1

static const uint8_t utf8d[] = {
  // The first part of the table maps bytes to character classes that
  // to reduce the size of the transition table and create bitmasks.
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,

  // The second part is a transition table that maps a combination
  // of a state of the automaton and a character class to a state.
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  12,36,12,12,12,12,12,12,12,12,12,12, 
};



static inline size_t
utf8_decode(const void *buf, size_t buf_size, void *ret_buf, size_t ret_buf_size) {
	char *cur_pos;
	size_t i, ret_size;
	uint32_t type, codep, state;
	uint8_t byte, *ret_buf_pos;

	if (buf == NULL || buf_size == 0 || ret_buf == NULL || ret_buf_size == 0)
		return (0);

	state = 0;
	ret_size = 0;
	ret_buf_pos = ((uint8_t*)ret_buf);
	for(i = 0; i < buf_size && ret_buf_size > 0; i ++) {
		byte = (*(((uint8_t*)buf) + i));
		type = utf8d[byte];

		codep = (state != UTF8_ACCEPT) ?
			(byte & 0x3fu) | (codep << 6) :
			(0xff >> type) & (byte);
		state = utf8d[256 + state + type];
		if (state == 0) {
			(*ret_buf_pos) = codep;
			ret_buf_pos ++;
			ret_buf_size --;
		}
	}

return (ret_size);
}



#endif /* __UTF8_H__ */
