#!perl
use strict;
use warnings;
use Encode;

# バイト列を16進変換
sub hexBytes{
	my($a)=@_;
	if(utf8::is_utf8($a)){ $a =Encode::encode('utf8',$a); }
	$a =~ s/(.)/unpack('H2', $1)/egs;
	return $a;
}

# シフトJISから区点に変換する
sub sjis_to_kuten{
	my($lead,$trail) = unpack('C*',$_[0]);
	my $row = $lead - 0x81;
	($lead > 0x9f ) and $row -= 0x40;
	my $cell = $trail - 0x3F;
	($trail > 0x7E) and --$cell;
	$row = ($row<<1)+1;
	if($cell > 94){ $cell -=94; ++$row; }
	return pack('C*',$row+0x20,$cell+0x20);
}

# 有効なunicode文字か
sub isValidUnicode($){
	my $i = shift;
	return 0 if $i>=0xD800 && $i <= 0xDFFF;
	return 0 if $i>=0xfdd0 && $i <= 0xfdef;
	return 0 if $i == 0xfffe;
	
	return 1;
}


#############################################################
# JIS X 0208 テーブルの作成

# cp932等ではJISの区点番号に存在しない95～120区を扱うようになっている。
# 変換式を使って区点番号を調べる。

my %x208encode;
my %x208decode;

# Unicodeをスキャンしてcp932の2バイト文字に変換できるものを探す
for(my $i=0x80;$i<0xFFFF;++$i){
	next if not isValidUnicode $i;
	my $u = chr($i);
	next if -1 != index("?%\x20\x22",$u);
	my $cp932 = Encode::encode('cp932',$u);
	next if $cp932 eq '?' or $cp932 eq "\x20" or $cp932 eq "\x22";

	my $ue;
	if($i <= 0x20 || $i == 0x7f || $u eq '%'){
		$ue = '%'.unpack('H2',$u);
	}else{
		$ue = Encode::encode('utf8',$u);
	}

	next if length($cp932)!=2;

	my $x208b = sjis_to_kuten($cp932);
	my $x208 = Encode::encode('jis0208-raw',$u);
	($x208 eq " ") and undef $x208;
	
	if( defined $x208 and $x208b ne $x208 ){
#		printf"unicode=%4x %s cp932=%s x208b=%s x208=%s ERROR!!\n",$i,$ue,hexBytes($cp932),hexBytes($x208b),hexBytes($x208);
	}else{
#		printf"unicode=%4x %s cp932=%s x208b=%s\n",$i,$ue,hexBytes($cp932),hexBytes($x208b);

		$x208encode{$i}=$x208b;

		if( defined $x208decode{$x208b} ){
			$x208decode{$x208b} .= $u;
		}else{
			$x208decode{$x208b} = $u;
		}
	}
}

# エンコード時にごまかす
for(split/\n/,<<'END')
END
{
	
}
# x208 エンコード用テーブルを出力する
{
	warn "x208encode entry count=",scalar(keys %x208encode),"\n";
	open(OUT,">x208encode.map");
	binmode OUT;
	print OUT ' ' x (256*2);
	my @start;
	for(my $i=0;$i<256;++$i){
		my $buf = "";
		for(my $j = 0;$j <256;++$j){
			my $unicode = $i*256+$j;
			my $x208b = $x208encode{$unicode};
			next if not $x208b;
			$buf .= pack('C',$j).$x208b;
		}
		if(not length $buf){
			push @start,0xFFFF;
		}else{
			my $n = (length($buf)/3)-1;
			push @start,tell OUT;
			print OUT pack('C',$n);
			print OUT $buf;
		}
	}
	seek OUT,0,0;
	for(@start){
		print OUT pack('n',$_);
	}
	close OUT;
}

# x208 デコード用テーブルを出力する
{
	my %force=(
		0x2124=>Encode::decode('utf-8','，'),
		0x2126=>Encode::decode('utf-8','・'),
		0x2131=>Encode::decode('utf-8','￣'),
		0x2171=>Encode::decode('utf-8','￠'),
		0x2172=>Encode::decode('utf-8','￡'),
		0x224c=>Encode::decode('utf-8','￢'),
		0x2263=>Encode::decode('utf-8','≪'),
		0x2264=>Encode::decode('utf-8','≫'),
		0x2574=>Encode::decode('utf-8','ヴ'),
		0x264c=>Encode::decode('utf-8','μ'),
	);

	warn "x208decode entry count=",scalar(keys %x208decode),"\n";
	open(OUT,">x208decode.map");
	binmode OUT;
	print OUT ' ' x (256*2);
	my @start;

	for(my $i=0;$i<256;++$i){
		my $buf = "";
		for(my $j=0;$j<256;++$j){
			my $n = $i*256+$j;
			my $unicodes = $force{$n} || $x208decode{pack('n',$n)};
			next if not $unicodes;
			# if(length($unicodes)>1)
			if(0)
			{
				printf "kuten=[%x] [%s] unicodes=",$n,Encode::encode('utf8',$unicodes);
				for(split //,$unicodes){
					printf "%x ",ord($_);
				}
				print "\n";
			}
			my $code = ord($unicodes);
			warn "bad unicode $n\n" if $n >= 0xffff;
			$buf .= pack('Cn',$j,$code);
		}
		if(not length $buf){
			push @start,0xFFFF;
		}else{
			my $n = (length($buf)/3)-1;
			push @start,tell OUT;
			print OUT pack('C',$n);
			print OUT $buf;
		}
	}
	seek OUT,0,0;
	for(@start){
		print OUT pack('n',$_);
	}
	close OUT;
}

################################################

my %x212encode;
my %x212decode;

# Unicodeをスキャンしてcp932の2バイト文字に変換できるものを探す
for(my $i=0x80;$i<0xFFFF;++$i){
	next if not isValidUnicode $i;

	# x208にある文字は調べない
	next if $x208encode{$i};

	# unicode character
	my $u = chr($i);
	next if -1 != index("?%\x20\x22",$u);

	# x212 の文字
	my $x212 = Encode::encode('jis0212-raw',$u);
	next if $x212 eq '?' or $x212 eq "\x20" or $x212 eq "\x22";

	# 表示用にエスケープする
	my $ue;
	if($i <= 0x20 || $i == 0x7f || $u eq '%'){
		$ue = '%'.unpack('H2',$u);
	}else{
		$ue = Encode::encode('utf8',$u);
	}

#	printf "unicode=%4x %s x212=%s\n",$i,$ue,hexBytes($x212);
	$x212encode{$i}=$x212;
	
	if( $x212decode{$x212} ){
		$x212decode{$x212} .= $u;
	}else{
		$x212decode{$x212} = $u;
	}
}

# x212 エンコード用テーブルを出力する
{
	warn "x212encode entry count=",scalar(keys %x212encode),"\n";
	open(OUT,">x212encode.map");
	binmode OUT;
	print OUT ' ' x (256*2);
	my @start;
	for(my $i=0;$i<256;++$i){
		my $buf = "";
		for(my $j = 0;$j <256;++$j){
			my $unicode = $i*256+$j;
			my $x212 = $x212encode{$unicode};
			next if not $x212;
			$buf .= pack('C',$j).$x212;
		}
		if(not length $buf){
			push @start,0xFFFF;
		}else{
			my $n = (length($buf)/3)-1;
			push @start,tell OUT;
			print OUT pack('C',$n);
			print OUT $buf;
		}
	}
	seek OUT,0,0;
	for(@start){ print OUT pack('n',$_); }
	close OUT;
}

# x212 デコード用テーブルを出力する
{
	my %force=(
#		0x2124=>Encode::decode('utf-8','，'),
	);

	warn "x212decode entry count=",scalar(keys %x212decode),"\n";
	open(OUT,">x212decode.map");
	binmode OUT;
	print OUT ' ' x (256*2);
	my @start;

	for(my $i=0;$i<256;++$i){
		my $buf = "";
		for(my $j=0;$j<256;++$j){
			my $n = $i*256+$j;
			my $unicodes = $force{$n} || $x212decode{pack('n',$n)};
			next if not $unicodes;
			#if(length($unicodes)>1)
			if(0)
			{
				printf "x212=[%x] [%s] unicodes=",$n,Encode::encode('utf8',$unicodes);
				for(split //,$unicodes){
					printf "%x ",ord($_);
				}
				print "\n";
			}
			my $code = ord($unicodes);
			warn "bad unicode $n\n" if $n >= 0xffff;
			$buf .= pack('Cn',$j,$code);
		}
		if(not length $buf){
			push @start,0xFFFF;
		}else{
			my $n = (length($buf)/3)-1;
			push @start,tell OUT;
			print OUT pack('C',$n);
			print OUT $buf;
		}
	}
	seek OUT,0,0;
	for(@start){
		print OUT pack('n',$_);
	}
	close OUT;
}

###########################################################3
# X 0201 片仮名

# Unicode(0xff61～0xff9f) <=> x201kana(0xa1～0xdf) は単純マップ
# Unicodeから全角カナへ変換する場合にマップが必要になる

my %x201encode;
my %x201decode;
open(IN,"<src-201kana.txt");
while(<IN>){
	if( /unicode=(\w+) \S+ cp932=(\w+) \S+ kuten=\[(\w+)\]/ ){
		my($u,$h,$z)=map{hex $_} ($1,$2,$3);
		$h &= 0x7f; # 7ビットにする
		$u -= 0xFF61; # 実際にはff61～ff9f の範囲
		$z &= 0xff; # 上位は常に%30
		$x201encode{$u}=[$h,$z];
		$x201decode{$h}=$u;
	}
}
warn "x201encode has ",scalar(keys %x201encode)," entry.\n";
warn "x201decode has ",scalar(keys %x201decode)," entry.\n";
open(OUT,">x201u2z.map");
binmode OUT;
for(my $i=0;$i<64;++$i){
	my $pair=$x201encode{$i};
	if($pair){
		print OUT pack('C',$pair->[1]);
	}else{
		print OUT pack('C',0xff);
	}
}




##################################################################
# JIS X 213

# 飽きたので未対応


