#!/usr/bin/perl -CDS

# CJKVI-IDS to IDSgrep EIDS translator
# Copyright (C) 2013, 2014  Matthew Skala
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Matthew Skala
# http://ansuz.sooke.bc.ca/
# mskala@ansuz.sooke.bc.ca

use utf8;

$svnid='$Id: cjkvi2eids 3040 2014-04-27 01:56:00Z mskala $';

print "〖EIDS kanji decomposition dictionary generated by cjkvi2eids\n"
     ."　$svnid〗;\n";

$style=uc(shift);

while (<>) {
  next if /^\s*;/; # now a slash to keep my syntax highlighter happy: /
  chomp;
  s/&([A-Z\-+_0-9]+);/<$1>;/gi;
  s/\[U\]/\[X\]/g;
  @fields=split("\t",$_);
  shift @fields;
  $head=shift @fields;
  $entry{$head}=join("\t",@fields);
}

foreach $head (keys %entry) {
  @fields=split("\t",$entry{$head});
  $expansion='';
  foreach $field (@fields) {
    if ($field=~/^(.*)\[[GTKJVXH]*$style[GTKJVXH]*\]$/) {
      $expansion=$1;
      last;
    }
  }
  if ($expansion eq '') {
    if ($fields[0]=~/^(.*)\[[GTKJVXH]+\]$/) {
      $expansion=($style eq 'A')?$1:$head;
    } else {
      $expansion=$fields[0];
    }
  }
  $expand{$head}=$expansion if $expansion ne $head;
}

sub write_entry {
  my($head,$exp)=(@_);

  while (($exp=~/^(.|<[^>]+>;)$/) && ($expand{$exp} ne '')) {
    return if $head eq $exp;
    $exp=$expand{$exp};
  }
  return if $exp=~/^(.|<[^>]+>;)$/;
  return if $exp eq '';
  if ($head=~/^<(.*)>;$/) {
    print "【$1】";
  } else {
    print "【$head】";
  }
  $todo=$exp;
  while ($todo ne '') {
    if ($todo=~/^<([^>]+)>;(.*)$/) {
      $ch=$1;
      $tl=$2;
      if ($expand{"<$ch>;"} ne '') {
        $xp=$expand{"<$ch>;"};
        while (($xp=~/^(.|<[^>]+>;)$/) && ($expand{$xp} ne '')) {
          $xp=$expand{$xp};
        }
        if ($xp=~/^(.|<[^>]+>;)$/) {
          print $xp;
          $todo=$tl;
        } else {
          print "<$ch>";
          $todo=$xp.$tl;
        }
      } else {
        print "<$ch>;";
        $todo=$tl;
      }
    } else {
      $todo=~/^(.)(.*)$/;
      $ch=$1;
      $tl=$2;
      if ($expand{$ch} ne '') {
        $xp=$expand{$ch};
        while (($xp=~/^(.|<[^>]+>;)$/) && ($expand{$xp} ne '')) {
          $xp=$expand{$xp};
        }
        if ($xp=~/^(.|<[^>]+>;)$/) {
          print $xp;
          $todo=$tl;
        } else {
          print "<$ch>";
          $todo=$xp.$tl;
        }
      } else {
        print "$ch";
        $todo=$tl;
      }
    }
  }
  print "\n";
}

if ($style eq 'A') {
  foreach $head (sort keys %entry) {
    @fields=split("\t",$entry{$head});
    foreach $field (@fields) {
      if ($field=~/^(.*)\[[GTKJVXH]+\]$/) {
        &write_entry($head,$1);
      } else {
        &write_entry($head,$field);
      }
    }
  }
} else {
  foreach $head (sort keys %expand) {
    &write_entry($head,$expand{$head});
  }
}

