#define NOMINMAX

#include <string>
#include <vector>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <set>
#include <limits>
#include <algorithm>

#include <windows.h>
#include <process.h>

#include "v8.h"
#include "v8stdint.h"

#include "libxml/HTMLParser.h"
#include "libxml/HTMLTree.h"
#include "libxml/uri.h"

#include "mecab.h"
#include "scoped_ptr.h"

#include "sqlite3.h"
#include "fts3_tokenizer.h"

#define SNIPPET_SIZE 400

struct xmlDocRAII{
	xmlDocPtr ptr;
	xmlDocRAII(xmlDocPtr ptr_){ ptr = ptr_; }
	~xmlDocRAII(){ if (ptr) xmlFreeDoc(ptr); }
	operator xmlDocPtr(){ return ptr; }
private:
	xmlDocRAII();
	xmlDocRAII(const xmlDocRAII &);
	xmlDocRAII &operator =(const xmlDocRAII &);
};

struct xmlRAII{
	void *ptr;
	xmlRAII(xmlChar * ptr_){ ptr = ptr_; }
	~xmlRAII(){ if (ptr) xmlFree(ptr); }
	operator void *(){ return ptr; }
private:
	xmlRAII();
	xmlRAII(const xmlRAII &);
	xmlRAII&operator =(const xmlRAII &);
};

void download(const char *url, std::vector<uint8_t> &data);

// ../  ./ BSďɕςB#B
std::string parse_url(const std::string &url_toparse, const std::string &url_base = ""){
	std::size_t np = std::string::npos;
	std::string url_result;
	std::size_t curpos = 0, url_result_prevpos = std::string::npos;
	for(;;){
		std::size_t nextpos = url_toparse.find_first_of("/", curpos);
		std::string url_part = url_toparse.substr(curpos, nextpos == np ? np : nextpos-curpos+1);

		if (url_part.size() == 0) break;
		curpos = nextpos + 1;
		for (size_t i = 0; i < url_part.size(); ++i) url_part[i] = std::tolower(url_part[i]);
		if (url_part == "../" && url_result_prevpos != std::string::npos){
			if (url_result == url_base) continue;
			url_result = url_result.substr(0, url_result_prevpos);
			url_result_prevpos = url_result.find_last_of("/", 0, url_result.size()-1);
			if (url_result_prevpos != std::string::npos) --url_result_prevpos;
		}else if (url_part == "./") continue;
		else{
			url_result_prevpos = url_result.size();
			url_result += url_part;
		}
		if (nextpos == np) break;
	}
	std::size_t sharp = url_result.find_first_of("#");
	if (sharp != np) return url_result.substr(0, sharp);
	return url_result;
}

// ==== SQLITE3 MECABg[JiCUo^邽߂̊֐QAyэ\
namespace MecabTokenizer{
#if 0
struct Tokenizer{
	sqlite3_tokenizer base;
	mecab_t *mecab;
};
struct Cursor{
	sqlite3_tokenizer_cursor base;
	const mecab_node_t *node;
	char *buf;
	int buflen, offset, pos;
};

int create(int argc, const char * const *argv, sqlite3_tokenizer **otokbase){
	int rc = SQLITE_OK;

	Tokenizer *tok = static_cast<Tokenizer *>(std::calloc(1, sizeof(Tokenizer)));
	if (!tok){ rc = SQLITE_NOMEM; goto ONERROR;};
	tok->mecab = mecab_new(argc, const_cast<char **>(argv));
	if (!tok->mecab) { rc = SQLITE_ERROR; goto ONERROR;};

	*otokbase = reinterpret_cast<sqlite3_tokenizer *>(tok);
	return rc;

ONERROR:
	std::free(tok);
	return rc;
}

int destroy( sqlite3_tokenizer *tokbase ){
	Tokenizer *tok = reinterpret_cast<Tokenizer *>(tokbase);
	mecab_destroy(tok->mecab);
	std::free(tok);
	return SQLITE_OK;
}

int open( sqlite3_tokenizer *tokbase, const char *input, int len, sqlite3_tokenizer_cursor **ocur ) {
	static const int DEFAULT_CURSOR_BUF = 256;
	int rc = SQLITE_OK;

	Tokenizer *tok = reinterpret_cast<Tokenizer *>(tokbase);
	Cursor *cur = static_cast<Cursor *>(std::calloc(1, sizeof(Cursor)));
	if (!cur){ rc = SQLITE_NOMEM; goto ONERROR;};
	cur->buf = static_cast<char *>(std::malloc(DEFAULT_CURSOR_BUF));
	if (!cur->buf){ rc = SQLITE_NOMEM; goto ONERROR;};
	cur->buflen = DEFAULT_CURSOR_BUF;
	cur->offset = cur->pos = 0;
	cur->node = mecab_sparse_tonode2(tok->mecab, input, std::strlen(input)+1);
	if (!cur->node) { rc = SQLITE_ERROR; goto ONERROR;};

	*ocur = reinterpret_cast<sqlite3_tokenizer_cursor *>(cur);
	return rc;

ONERROR:
	if (cur){
		std::free(cur->buf);
		std::free(cur);
	}
	*ocur = 0;
	return rc;
}

int close( sqlite3_tokenizer_cursor *curbase ){
	Cursor *cur = reinterpret_cast<Cursor *>(curbase);
	std::free(cur->buf);
	std::free(cur);
	return SQLITE_OK;
}

int next( sqlite3_tokenizer_cursor *curbase, const char **otok, int *olen, int *ostartoffset, int *oendoffset, int *opos){
	Cursor *cur = reinterpret_cast<Cursor *>(curbase);

	const mecab_node_t *node = cur->node;

	// length0̃m[h΂
	while(node->next && node->length == 0) node = node->next;

	// obt@̃TCY͂ꂽm[h̃TCYȏɂ
	if (node->length > cur->buflen){
		cur->buf = static_cast<char *>(std::realloc(cur->buf, node->length + 1));
		cur->buflen = node->length;
	}

	// m[h̒gJ[\ɃRs[
	std::strncpy(cur->buf, node->surface, node->length);
	cur->buf[node->length] = '\0';

	// ݂̃J[\ێĂm[h̏߂
	*otok = cur->buf;
	*olen = node->length;
	*ostartoffset = cur->offset;
	*oendoffset = cur->offset + node->length;
	*opos = cur->pos++;

	if (!node->next) return SQLITE_DONE;

	// J[\ɐi߂
	cur->node = node->next;
	cur->offset += node->rlength;

	return SQLITE_OK;
}
#else
struct Tokenizer{
	sqlite3_tokenizer base;
	MeCab::Tagger *tagger;
	const char *top;
	std::vector<char> token;
	int token_count;
};
struct Cursor{
	sqlite3_tokenizer_cursor base;
	const MeCab::Node *node;
};

int create(int argc, const char * const *argv, sqlite3_tokenizer **otokbase){
	int rc = SQLITE_OK;

	Tokenizer *tok = new Tokenizer();
	tok->tagger = MeCab::createTagger(argc, const_cast<char **>(argv));
	if (!tok->tagger) { rc = SQLITE_ERROR; goto ONERROR;};

	*otokbase = reinterpret_cast<sqlite3_tokenizer *>(tok);
	return rc;

ONERROR:
	delete tok;
	return rc;
}

int destroy( sqlite3_tokenizer *tokbase ){
	Tokenizer *tok = reinterpret_cast<Tokenizer *>(tokbase);
	if (tok) MeCab::deleteTagger(tok->tagger);
	delete tok;
	return SQLITE_OK;
}

int open( sqlite3_tokenizer *tokbase, const char *input, int len, sqlite3_tokenizer_cursor **ocur ) {
	int rc = SQLITE_OK;

	Tokenizer *tok = reinterpret_cast<Tokenizer *>(tokbase);
	Cursor *cur = new Cursor();

	cur->node = tok->tagger->parseToNode(input);
	if (!cur->node) { rc = SQLITE_ERROR; goto ONERROR;};
	tok->top = cur->node->surface;
	tok->token_count = 0;

	*ocur = reinterpret_cast<sqlite3_tokenizer_cursor *>(cur);
	return rc;

ONERROR:
	delete cur;
	*ocur = 0;
	return rc;
}

int close( sqlite3_tokenizer_cursor *curbase ){
	Cursor *cur = reinterpret_cast<Cursor *>(curbase);
	delete cur;
	return SQLITE_OK;
}

int next( sqlite3_tokenizer_cursor *curbase, const char **otok, int *olen, int *ostartoffset, int *oendoffset, int *opos){
	Cursor *cur = reinterpret_cast<Cursor *>(curbase);
	Tokenizer *tok = reinterpret_cast<Tokenizer *>(cur->base.pTokenizer);

	const MeCab::Node *node = cur->node;

	// length0̃m[h΂
	while(node->next && node->length == 0) node = node->next;

	tok->token.resize(node->length+1);

	// m[h̒gJ[\ɃRs[
	std::strncpy(&tok->token[0], node->surface, node->length);
	tok->token[node->length] = '\0';

	// ݂̃J[\ێĂm[h̏߂
	*otok = &tok->token[0];
	*olen = node->length;
	*ostartoffset = node->surface - tok->top;
	*oendoffset = *ostartoffset + node->length;
	*opos = tok->token_count++;

	if (!node->next) return SQLITE_DONE;

	// J[\ɐi߂
	cur->node = node->next;

	return SQLITE_OK;
}
#endif
const sqlite3_tokenizer_module mod = {
    0,
	MecabTokenizer::create,
    MecabTokenizer::destroy,
    MecabTokenizer::open,
    MecabTokenizer::close,
    MecabTokenizer::next,
};
}
// ====

// MecabTokenizer  SQLITE3 ɓo^
int register_mecab(sqlite3 *db){
	int rc;

	sqlite3_stmt *stmt_reg_tok = 0;
	if ((rc = sqlite3_prepare_v2(db, "SELECT fts3_tokenizer(?, ?)", -1, &stmt_reg_tok, 0)) != SQLITE_OK) return rc;
	if ((rc = sqlite3_bind_text(stmt_reg_tok, 1, "mecab", -1, SQLITE_STATIC)) != SQLITE_OK) return rc;
	const sqlite3_tokenizer_module *p = &MecabTokenizer::mod;
	if ((rc = sqlite3_bind_blob(stmt_reg_tok, 2, &p, sizeof(p), SQLITE_STATIC)) != SQLITE_OK) return rc;
	if ((rc = sqlite3_step(stmt_reg_tok)) != SQLITE_ROW) return rc;
	if ((rc = sqlite3_finalize(stmt_reg_tok)) != SQLITE_OK) return rc;

	if ((rc = sqlite3_prepare_v2(db, "SELECT fts3_tokenizer(?)", -1, &stmt_reg_tok, 0)) != SQLITE_OK) return rc;
	if ((rc = sqlite3_bind_text(stmt_reg_tok, 1, "mecab", -1, SQLITE_STATIC)) != SQLITE_OK) return rc;
	if ((rc = sqlite3_step(stmt_reg_tok)) != SQLITE_ROW) return rc;
	if ((rc = sqlite3_finalize(stmt_reg_tok)) != SQLITE_OK) return rc;

	return rc;
}

void parse_pdf(const char *url_pdf, std::string &content);

void parse_and_create_db(char *url_start, char *db_name){
	std::vector<char> filebuf;

	int rc;

	// db쐬
	char *rstr = 0;
	sqlite3 *db = 0;
	sqlite3_open_v2(db_name, &db, SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE, 0);

	rc = register_mecab(db);

	// e[u쐬
	rc = sqlite3_exec(db,
		"CREATE VIRTUAL TABLE tbl USING FTS3 ("
		"  url TEXT, title TEXT, str TEXT, tokenize mecab '-d' 'sys.dic' '-r' 'dicrc' '-Owakati' )",
		0, 0, &rstr);

	// parseJn
	std::set<std::string> searched_url;
	sqlite3_exec(db, "BEGIN;", 0, 0, &rstr);

	sqlite3_stmt *stmt_insert = 0;
	rc = sqlite3_prepare_v2(db, "INSERT INTO tbl VALUES (?, ?, ?)", -1, &stmt_insert, 0);
	if (rc != SQLITE_OK) return;


#if _WIN32
	std::string url_bs2s = url_start;
	for (size_t i = 0; i < url_bs2s.size(); ++i){
		if (url_bs2s[i] == '\\') url_bs2s[i] = '/';
	}
#else
	std::string url_bs2s = url_start;
#endif
	std::vector<std::pair<std::string, std::string> > urles_toparse;

	// p[XΏۂ̃fBNg
	std::size_t sp = url_bs2s.find_last_of("/");
	std::string url_base;
	if (sp != std::string::npos){
		url_base = url_bs2s.substr(0, sp+1);
		urles_toparse.push_back(std::make_pair(url_base, url_bs2s.substr(sp+1)));
	}else{
		urles_toparse.push_back(std::make_pair("", url_bs2s));
	}

	char *mecab_argv[] = {
		"-d", "sys.dic", "-r", "dicrc", "-Owakati"
	};
	MeCab::scoped_ptr<MeCab::Tagger> tagger(MeCab::createTagger(5, mecab_argv));

//	int count = 0;
	while(urles_toparse.size()){
//		++count;
//		if (count == 50) break;
		std::string url_toparse = urles_toparse.back().second;
		std::string curdir = urles_toparse.back().first;

		urles_toparse.resize(urles_toparse.size()-1);

		// p[XΏۂ̃fBNgύX
		size_t pos = url_toparse.find_last_of("/");
		if (pos != std::string::npos){
			curdir = parse_url(curdir + url_toparse.substr(0, pos+1), url_base);
			url_toparse = curdir + url_toparse.substr(pos + 1);
		}else{
			url_toparse = curdir + url_toparse;
		}
		if (searched_url.find(url_toparse) != searched_url.end()){
//			printf("==== %s parsed\n", url_toparse.c_str());
			continue;
		}

		xmlDocRAII htmlDoc = htmlParseFile(url_toparse.c_str(), 0);
		if (!htmlDoc.ptr) continue;

//		std::printf("==== %s parsing\n", url_toparse.c_str());
		std::fprintf(stderr, "==== %s parsing\n", url_toparse.c_str());
		searched_url.insert(url_toparse);

		htmlNodePtr node = xmlDocGetRootElement(htmlDoc);
		std::vector<htmlNodePtr> nodeStack;
		nodeStack.push_back(node);

		std::string title, content;
		// ehtmlp[X
		while(nodeStack.size()){
			htmlNodePtr node = nodeStack.back();
			nodeStack.resize(nodeStack.size()-1);
			if (node->type == XML_TEXT_NODE){
				xmlChar *s = xmlNodeListGetString(htmlDoc, node, 1);
				xmlRAII xr(s);
				std::string piece = reinterpret_cast<const char *>(s);
				for (; *s; ++s){
					if (s[0] == 0xc2 && s[1] == 0x80) content += ' ', ++s;
					else if (s[0] >= 0x80 || std::isprint(s[0])) content += static_cast<char>(*s);
				}
				if (xmlStrcasecmp(node->parent->name, reinterpret_cast<const xmlChar *>("TITLE")) == 0){
					title = piece;
				}
			}else{
				if (node->type == XML_ELEMENT_NODE && xmlStrcasecmp(node->name, reinterpret_cast<const xmlChar *>("A")) == 0){
					for (xmlAttrPtr attr = node->properties; attr; attr = attr->next){
						if (xmlStrcasecmp(attr->name, reinterpret_cast<const xmlChar *>("HREF"))) continue;
						if (std::strchr(reinterpret_cast<const char *>(attr->children->content), ':')) continue;
						urles_toparse.push_back(
							std::make_pair(curdir, reinterpret_cast<const char *>(attr->children->content)));
						std::size_t sharp = urles_toparse.back().second.find_first_of("#");
						if (sharp != std::string::npos) urles_toparse.back().second = urles_toparse.back().second.substr(0, sharp);
					}
				}
			}
			if (node->next) nodeStack.push_back(node->next);
			if (node->children) nodeStack.push_back(node->children);
		}
//		printf("%s => \n", content.c_str());
		std::fprintf(stderr, "  title:%s\n", title.c_str());
		rc = sqlite3_reset(stmt_insert);
		std::string url_to_db = (url_base == url_toparse.substr(0, url_base.size())) ? url_toparse.substr(url_base.size()) : url_toparse;
		rc = sqlite3_bind_text(stmt_insert, 1, url_to_db.c_str(), -1, SQLITE_TRANSIENT);
		rc = sqlite3_bind_text(stmt_insert, 2, title.c_str(), -1, SQLITE_TRANSIENT);
		rc = sqlite3_bind_text(stmt_insert, 3, content.c_str(), -1, SQLITE_TRANSIENT);
		rc = sqlite3_step(stmt_insert);
					
//		printf(" - %s\n", tagger->parse(content.c_str()));
	}

	xmlCleanupParser();
	xmlCleanupCharEncodingHandlers();
	sqlite3_finalize(stmt_insert);
	sqlite3_exec(db, "commit;", 0, 0, &rstr);
	sqlite3_close(db);
}

void parse_and_create_db_from_urllist(char *urllist, char *db_name){
	std::vector<char> filebuf;

	int rc;

	// db쐬
	char *rstr = 0;
	sqlite3 *db = 0;
	sqlite3_open_v2(db_name, &db, SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE, 0);

	rc = register_mecab(db);

	// e[u쐬
	rc = sqlite3_exec(db,
		"CREATE VIRTUAL TABLE tbl USING FTS3 ("
		"  url TEXT, title TEXT, str TEXT, tokenize mecab '-d' 'sys.dic' '-r' 'dicrc' '-Owakati' )",
		0, 0, &rstr);

	// parseJn
	std::set<std::string> searched_url;
	sqlite3_exec(db, "BEGIN;", 0, 0, &rstr);

	sqlite3_stmt *stmt_insert = 0;
	rc = sqlite3_prepare_v2(db, "INSERT INTO tbl VALUES (?, ?, ?)", -1, &stmt_insert, 0);
	if (rc != SQLITE_OK) return;

	// URLXg擾
	std::vector<std::pair<std::string, std::string> > urles_toparse;
	{
		std::vector<char> buf;
		// Xgt@CTCY擾
		size_t sz;
		FILE *fp = std::fopen(urllist, "r");
		std::fseek(fp, 0, SEEK_END);
		sz = std::ftell(fp);
		std::rewind(fp);
		if (sz > 1024 * 1024) sz = 1024 * 1024;
		buf.resize(sz);
		bool first_line = true;
		while(!std::feof(fp)){
			buf[0] = '\0';
			if (!std::fgets(&buf[0], static_cast<int>(buf.size()), fp)) continue;
			if (buf[0] == '\0') continue;
			const char *title = "", *url = &buf[0];
			if (first_line){
				first_line = false;
				// BOM΂
				if (static_cast<unsigned char>(url[0]) == 0xEF &&
					static_cast<unsigned char>(url[1]) == 0xBB &&
					static_cast<unsigned char>(url[2]) == 0xBF) url += 3;
			}
			for (size_t i = 0; i < buf.size(); ++i){
				if (buf[i] == '\t'){
					title = &buf[i+1], buf[i] = '\0';
					break;
				}
			}
			urles_toparse.push_back(std::make_pair(std::string(url), std::string(title)));
		}
	}

	char *mecab_argv[] = {
		"-d", "sys.dic", "-r", "dicrc", "-Owakati"
	};
	MeCab::scoped_ptr<MeCab::Tagger> tagger(MeCab::createTagger(5, mecab_argv));

	for (size_t k = 0; k < urles_toparse.size(); ++k){
		std::string &url_toparse = urles_toparse[k].first;
		std::string title = "", content;

		std::fprintf(stderr, "==== %s parsing\n", url_toparse.c_str());
		if (url_toparse.find(".pdf") != std::string::npos){
			parse_pdf(url_toparse.c_str(), content);
			title = urles_toparse[k].second;
			if (title == ""){
				title = url_toparse.substr(url_toparse.find_last_of('/') + 1);
			}
		}else{
//			xmlDocRAII htmlDoc = htmlParseFile(filename, 0);
			std::vector<uint8_t> htmlData;
			download(url_toparse.c_str(), htmlData);
			xmlDocRAII htmlDoc = htmlParseDoc(&htmlData[0], 0);
			if (!htmlDoc.ptr) continue;

			searched_url.insert(url_toparse);

			htmlNodePtr node = xmlDocGetRootElement(htmlDoc);
			std::vector<htmlNodePtr> nodeStack;
			nodeStack.push_back(node);

			// htmlp[X
			while(nodeStack.size()){
				htmlNodePtr node = nodeStack.back();
				nodeStack.resize(nodeStack.size()-1);
				if (node->type == XML_TEXT_NODE){
					xmlChar *s = xmlNodeListGetString(htmlDoc, node, 1);
					xmlRAII xr(s);
					std::string piece = reinterpret_cast<const char *>(s);
					for (; *s; ++s){
						if (s[0] == 0xc2 && s[1] == 0x80) content += ' ', ++s;
						else if (s[0] >= 0x80 || std::isprint(s[0])) content += static_cast<char>(*s);
					}
					if (xmlStrcasecmp(node->parent->name, reinterpret_cast<const xmlChar *>("TITLE")) == 0){
						title = piece;
					}
#if 0
				}else{
					if (node->type == XML_ELEMENT_NODE && xmlStrcasecmp(node->name, reinterpret_cast<const xmlChar *>("A")) == 0){
						for (xmlAttrPtr attr = node->properties; attr; attr = attr->next){
							if (xmlStrcasecmp(attr->name, reinterpret_cast<const xmlChar *>("HREF"))) continue;
							if (std::strchr(reinterpret_cast<const char *>(attr->children->content), ':')) continue;
							urles_toparse.push_back(
								std::make_pair(curdir, reinterpret_cast<const char *>(attr->children->content)));
							std::size_t sharp = urles_toparse.back().second.find_first_of("#");
							if (sharp != std::string::npos) urles_toparse.back().second = urles_toparse.back().second.substr(0, sharp);
						}
					}
#endif
				}
				if (node->next) nodeStack.push_back(node->next);
				if (node->children) nodeStack.push_back(node->children);
			}
//			printf("%s => \n", content.c_str());
		}
		if (title == "") title = urles_toparse[k].second;
		std::fprintf(stderr, "  title:%s\n", title.c_str());
		rc = sqlite3_reset(stmt_insert);
		rc = sqlite3_bind_text(stmt_insert, 1, url_toparse.c_str(), -1, SQLITE_TRANSIENT);
		rc = sqlite3_bind_text(stmt_insert, 2, title.c_str(), -1, SQLITE_TRANSIENT);
		rc = sqlite3_bind_text(stmt_insert, 3, content.c_str(), -1, SQLITE_TRANSIENT);
		rc = sqlite3_step(stmt_insert);
					
//		printf(" - %s\n", tagger->parse(content.c_str()));
	}

	xmlCleanupParser();
	xmlCleanupCharEncodingHandlers();
	sqlite3_finalize(stmt_insert);
	sqlite3_exec(db, "commit;", 0, 0, &rstr);
	sqlite3_close(db);
}

int getpos_utf8_firstletter(const char *content_str, size_t contents_sz, int pos){
	while(pos < static_cast<int>(contents_sz) &&
		(0x80 <= static_cast<unsigned char>(content_str[pos]) &&
		 (static_cast<unsigned char>(content_str[pos]) & 0x40) == 0)) ++pos;
	return pos;
}

void create_snippet(const char *offsets, const char *content_str, std::string &snippet, size_t snippet_size){
	const char *p = offsets, *pe = offsets + std::strlen(offsets);
	std::pair<int, int> candidate(-1, -1);
	int cur_sentence_start_pos, prev_start_pos;
	int prev_term = std::numeric_limits<int>::max();
	std::vector<std::pair<int, int> > vv;
	size_t contents_sz = std::strlen(content_str);

	// offsets  int z (ΏۂstrȊÔ͔̂zɓȂ)
	while(p < pe){
		int v[4];
		for (int i = 0; i < 4; ++i) v[i] = -1;
		for (int i = 0; i < 4 && p < pe; ++i) v[i] = std::strtoul(p, const_cast<char **>(&p), 10);
		if (v[0] != 2 || (v[1] < 0 || v[2] < 0 || v[3] < 0)) continue;
		if (candidate.first == -1){
			int dif = v[2] - snippet_size / 4;
			candidate.first = getpos_utf8_firstletter(content_str, contents_sz, (dif >= 0) ? dif : 0);
			candidate.second = candidate.first + snippet_size + (dif < 0 ? std::abs(dif) : 0);
		}else{
			if (v[2] < candidate.second){
				if (candidate.second < v[2] + v[3]) candidate.second = v[2] + v[3];
			}else{
				break;
			}
		}
		vv.push_back(std::make_pair(v[2], v[3]));
	}
	candidate.second = getpos_utf8_firstletter(content_str, contents_sz, candidate.second);

	std::sort(vv.begin(), vv.end());

	// snippet 쐬
	if (candidate.first < 0){
		candidate.first = 0;
		candidate.second = getpos_utf8_firstletter(content_str, contents_sz, std::min(contents_sz, snippet_size));
	}
	snippet = (candidate.first == 0) ? "" : "...";

	int cur_pos = candidate.first;
	for (size_t i = 0; i < vv.size(); ++i){
		if (vv[i].first > cur_pos) snippet.insert(snippet.size(), content_str + cur_pos, vv[i].first - cur_pos);
		snippet += "<b>";
		snippet.insert(snippet.size(), content_str + vv[i].first, vv[i].second);
		snippet += "</b>";
		cur_pos = vv[i].first + vv[i].second;
	}
	if (candidate.second > cur_pos) snippet.insert(snippet.size(), content_str + cur_pos, candidate.second - cur_pos);
	if (candidate.second < contents_sz) snippet += "...";
}

int query(char *db_name, const char *query_str){
	int rc;
	sqlite3 *db;

	rc = sqlite3_open_v2(db_name, &db, SQLITE_OPEN_READONLY, 0);

	rc = register_mecab(db);

	sqlite3_stmt *stmt_query = 0;
	if ((rc = sqlite3_prepare_v2(db, "SELECT url, title, str, offsets(tbl) from tbl where tbl match ?", -1, &stmt_query, 0)) != SQLITE_OK) return rc;
	if ((rc = sqlite3_bind_text(stmt_query, 1, query_str, std::strlen(query_str), SQLITE_TRANSIENT)) != SQLITE_OK) return rc;
	while((rc = sqlite3_step(stmt_query)) == SQLITE_ROW){
		const char *rstr_url = reinterpret_cast<const char *>(sqlite3_column_text(stmt_query, 0));
		std::string snippet;
		create_snippet(reinterpret_cast<const char *>(sqlite3_column_text(stmt_query, 3)),
			reinterpret_cast<const char *>(sqlite3_column_text(stmt_query, 2)), snippet, SNIPPET_SIZE);
		const char *rstr_txt[] = {
			reinterpret_cast<const char *>(sqlite3_column_text(stmt_query, 1)),
			snippet.c_str()
		};
		const char *rend[2];
		std::string rstr_enc[2];
		for (int i = 0; i < 2; ++i){
			int len = 0;
			for (const char *iter = rstr_txt[i]; *iter; iter += len){
				rend[i] = rstr_txt[i] + std::strlen(rstr_txt[i]);
				len = rend[i] - iter;
				int code = xmlGetUTF8Char(reinterpret_cast<const unsigned char *>(iter), &len);
				if (code == -1){
					rstr_enc[i] = rstr_txt[i];
					break;
				}
				if (len == 1) rstr_enc[i] += *iter;
				else{
					char buf[9]; std::sprintf(buf, "&#x%04X;", code);
					rstr_enc[i] += buf;
				}
			}
		}

#if SNIPPET_SIZE == 0
		printf("<li><a href=\"%s\" target=frame>%s</a></li>\n", rstr_url, rstr_enc[0].c_str());
#else
		printf("<li><a href=\"%s\" target=frame>%s</a><br>\n", rstr_url, rstr_enc[0].c_str());
		printf("%s</li>\n", rstr_enc[1].c_str());
#endif
	}

	rc = sqlite3_finalize(stmt_query);

	rc = sqlite3_close(db);
	return rc;
}

typedef std::pair<int, char **> args_type;

bool mtime_htmlout = true;
unsigned int __stdcall ThreadProc(void *args_){
	struct RAII{
		v8::Isolate* isolate;
		RAII(){
			isolate = v8::Isolate::New();
			isolate->Enter();
		}
		~RAII(){
			isolate->Exit();
			isolate->Dispose();
		}
	}raii;

	args_type *args = static_cast<args_type *>(args_);
	int argc = args->first;
	char **argv = args->second;
	if (std::strcmp(argv[1], "create_db") == 0){
		mtime_htmlout = false;
		if (argc < 4) return 0;
		parse_and_create_db(argv[2], argv[3]);
	}else if (std::strcmp(argv[1], "create_db_from_urllist") == 0){
		mtime_htmlout = false;
		if (argc < 4) return 0;
		parse_and_create_db_from_urllist(argv[2], argv[3]);
	}else if (std::strcmp(argv[1], "query") == 0){
		if (argc < 4) return 0;
		std::string query_str;
		for (int i = 3; i < argc; ++i){
			char *unescaped = xmlURIUnescapeString(argv[i], 0, 0);
			query_str += unescaped;
			xmlMemFree(unescaped);

			if (i < argc - 1) query_str += " ";
		}
		query(argv[2], query_str.c_str());
	}
	return 0;
}

int main(int argc, char *argv[]){
	if (argc < 4){
		return 0;
	}
	LARGE_INTEGER c1, c2, freq;
	::QueryPerformanceFrequency(&freq);
	::QueryPerformanceCounter(&c1);

	args_type args(argc, argv);

	unsigned int tid;
	HANDLE hThread = reinterpret_cast<HANDLE>(_beginthreadex(0, 0, ThreadProc, &args, 0, &tid));
	DWORD rc = ::WaitForSingleObject(hThread, INFINITE);

	::QueryPerformanceCounter(&c2);
	double mtime = static_cast<double>(c2.QuadPart - c1.QuadPart) / static_cast<double>(freq.QuadPart);
	if (mtime_htmlout){
		if (rc == WAIT_OBJECT_0){
			std::printf("<br><br><span> %.5f sec </span>\n", mtime);
		}else{
			std::printf("<br><br><span> timeout (%.5f sec) </span>\n", mtime);
		}
	}else{
		if (rc == WAIT_OBJECT_0){
			std::fprintf(stderr, "%.5f sec\n", mtime);
		}else{
			std::fprintf(stderr, "timeout (%.5f sec)\n", mtime);
		}
	}

	return 0;
}
