#!/usr/bin/perl -CDS

# CJKVI-IDS to IDSgrep EIDS translator
# Copyright (C) 2013, 2014  Matthew Skala
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Matthew Skala
# http://ansuz.sooke.bc.ca/
# mskala@ansuz.sooke.bc.ca

use utf8;

$svnid='$Id: cjkvi2eids 3405 2014-10-21 18:12:59Z mskala $';

print "〖EIDS kanji decomposition dictionary generated by cjkvi2eids\n"
     ."　$svnid\n  Copyright notice below is from the input file.〗;\n";

$style=uc(shift);
$copyright='';

while (<>) {
  chomp;
  if (/^# (.*)/) {
    $copyright.='  ' if $copyright ne '';
    $copyright.="$1\n";
  }
  next if /^\s*;/; # now a slash to keep my syntax highlighter happy: /
  s/&([A-Z\-+_0-9]+);/<$1>;/gi;
  @fields=split("\t",$_);
  shift @fields;
  $head=shift @fields;
  $entry{$head}=join("\t",@fields);
}

foreach $head (keys %entry) {
  @fields=split("\t",$entry{$head});
  $expansion='';
  foreach $field (@fields) {
    if ($field=~/^(.*)\[[GTKJVXHAU]*$style[GTKJVXHAU]*\]$/) {
      $expansion=$1;
      last;
    }
  }
  if ($expansion eq '') {
    if ($fields[0]=~/^(.*)\[[GTKJVXHAU]+\]$/) {
      $expansion=$head;
    } else {
      $expansion=$fields[0];
    }
  }
  $expand{$head}=$expansion if $expansion ne $head;
}

sub write_entry {
  my($head,$exp)=(@_);

  while (($exp=~/^(.|<[^>]+>;)$/) && ($expand{$exp} ne '')) {
    return if $head eq $exp;
    $exp=$expand{$exp};
  }
  return if $exp=~/^(.|<[^>]+>;)$/;
  return if $exp eq '';
  if ($head=~/^<(.*)>;$/) {
    print "【$1】";
  } else {
    print "【$head】";
  }
  $todo=$exp;
  while ($todo ne '') {
    if ($todo=~/^<([^>]+)>;(.*)$/) {
      $ch=$1;
      $tl=$2;
      if ($expand{"<$ch>;"} ne '') {
        $xp=$expand{"<$ch>;"};
        while (($xp=~/^(.|<[^>]+>;)$/) && ($expand{$xp} ne '')) {
          $xp=$expand{$xp};
        }
        if ($xp=~/^(.|<[^>]+>;)$/) {
          print $xp;
          $todo=$tl;
        } else {
          print "<$ch>";
          $todo=$xp.$tl;
        }
      } else {
        print "<$ch>;";
        $todo=$tl;
      }
    } else {
      $todo=~/^(.)(.*)$/;
      $ch=$1;
      $tl=$2;
      if ($expand{$ch} ne '') {
        $xp=$expand{$ch};
        while (($xp=~/^(.|<[^>]+>;)$/) && ($expand{$xp} ne '')) {
          $xp=$expand{$xp};
        }
        if ($xp=~/^(.|<[^>]+>;)$/) {
          print $xp;
          $todo=$tl;
        } else {
          print "<$ch>";
          $todo=$xp.$tl;
        }
      } else {
        print "$ch";
        $todo=$tl;
      }
    }
  }
  print "\n";
}

chomp $copyright;
print "〖$copyright〗;\n";
foreach $head (sort keys %expand) {
  &write_entry($head,$expand{$head});
}

