#!/usr/bin/env perl

# create ltj-unicode-ccfix.tex by processing Unicode data file
#	LineBreak.txt

# modified from unicode-char-prep.pl (part of the XeTeX typesetting system).
# original copyright is as follows:
#
# /****************************************************************************\
#  Part of the XeTeX typesetting system
#  Copyright (c) 1994-2008 by SIL International
#  Copyright (c) 2009 by Jonathan Kew
# 
#  SIL Author(s): Jonathan Kew
# 
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE
# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# 
# Except as contained in this notice, the name of the copyright holders
# shall not be used in advertising or otherwise to promote the sale,
# use or other dealings in this Software without prior written
# authorization from the copyright holders.
# \****************************************************************************/

die "usage: perl $0 LineBreak.txt > unicode-letters.tex\n"
	unless $#ARGV == 0;

sub parse_unidata {
	my (@u) = @_;
	$lccode{$u[0]} = $u[13] if $u[13] ne '';
	$lccode{$u[0]} = $u[0]  if $u[13] eq '' and ($u[2] =~ /^L/ or $u[12] ne '');
	$uccode{$u[0]} = $u[12] if $u[12] ne '';
	$uccode{$u[0]} = $u[0]  if $u[12] eq '' and ($u[2] =~ /^L/ or $u[13] ne '');
	if ($u[2] =~ /^L/) {
		push(@letters, $u[0]);
	}
	elsif ($u[2] =~ /^M/) {
		push(@marks, $u[0]);
	}
	elsif (exists $lccode{$u[0]} or exists $uccode{$u[0]}) {
		push(@casesym, $u[0]);
	}
}


my ($start, $end);
$date = `date`;
chomp $date;
print << "__EOT__";
-- Do not edit this file!
-- Created from LineBreak.txt by ltj-unicode-ccfix_make.pl on $date.
-- In case of errors, fix the Perl script instead.
__EOT__

%lineBreakClass = (
	'ID' => 1,		# ideograph
);


print << '__EOT__';
if tex.getcatcode(0x6F22)==12 then
  local tex_catcode = tex.setcatcode
  local function set_letter(b,e)
    for i=b,e do tex_catcode('global', i, 11) end
  end

__EOT__

open LineBreak, $ARGV[0] or die "can't read $ARGV[0]";
while (<LineBreak>) {
	chomp;
	s/ *#.*//;
	s/ +$//;
	if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))?;(..)/) {
		$s = $1;
		$e = $2;
		$lb = $3;
		$e = $s if $e eq '';
		if (exists $lineBreakClass{$lb}) {
			if ($lineBreakClass{$lb} == 1) {
				# ideographs: set whole range to class 1
				print "  set_letter(0x$s,0x$e)\n";
			}
		}
	}
}
close LineBreak;

print << '__EOT__';

end

__EOT__
