#!/bin/bash

MOZCVER="2.23.2815.102"
NEOLOGDDATE="20200514"
NEOLOGDURL="https://github.com/neologd/mecab-ipadic-neologd/raw/master/seed/mecab-user-dict-seed.$NEOLOGDDATE.csv.xz"
REVISION="1"


# ==============================================================================
# remove tmp files
# ==============================================================================

rm -f ../mozcdic-*.txt
rm -rf mozc-$MOZCVER+dfsg/
rm -f {id.def,mecab-user-dict-seed.*,mozcdic.txt,*.costs*}
rm -f chimei/{*.CSV*,*.zip}
rm -f wikipedia/jawiki*ns0
rm -f wikipedia/jawiki*ns0.modified*


# ==============================================================================
# get official mozc
# ==============================================================================

# get official mozc from Debian
echo "get official mozc from Debian..."
wget -N ftp.jp.debian.org/debian/pool/main/m/mozc/mozc_$MOZCVER+dfsg.orig.tar.xz

# extract official mozc
echo "extract official mozc..."
tar xf mozc_$MOZCVER+dfsg.orig.tar.xz

# get hinsi ID
cp mozc-$MOZCVER+dfsg/src/data/dictionary_oss/id.def .


# ==============================================================================
# get jawiki-latest-all-titles
# ==============================================================================

# get zip code data
echo "get jawiki-latest-all-titles..."
cd wikipedia/
wget -N https://dumps.wikimedia.org/jawiki/latest/jawiki-latest-all-titles-in-ns0.gz
gzip -dkf jawiki-latest-all-titles-in-ns0.gz
cd -


# ==============================================================================
# generate placenames and ZIP codes
# ==============================================================================

# get zip code data
echo "get zip code data..."
cd chimei/
wget http://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip
wget http://www.post.japanpost.jp/zipcode/dl/jigyosyo/zip/jigyosyo.zip
unzip ken_all.zip
unzip jigyosyo.zip

# modify zip code data
echo "modify KEN_ALL.CSV..."
ruby modify-ken_all.rb KEN_ALL.CSV

# generate zip code entries
echo "generate zip code entries..."
ruby generate-zipcode-ken_all.rb KEN_ALL.CSV.modzip
ruby generate-zipcode-jigyosyo.rb JIGYOSYO.CSV
cat *.zipcode > ../zipcode.costs

# generate chimei dictionary
echo "generate chimei dictionary..."
ruby generate-chimei-dic.rb KEN_ALL.CSV.modzip
cd ..


# ==============================================================================
# generate neologd dictionary
# ==============================================================================

# get official mozcdic
cat mozc-$MOZCVER+dfsg/src/data/dictionary_oss/dictionary*.txt > mozcdic.txt

# get mecab-user-dict-seed
echo "get mecab-user-dict-seed..."
cd mecab-ipadic-neologd/
wget -nc $NEOLOGDURL

# extract mecab-user-dict-seed
echo "extract mecab-user-dict-seed..."
xz -dk mecab-user-dict-seed.$NEOLOGDDATE.csv.xz
mv mecab-user-dict-seed.$NEOLOGDDATE.csv ..
cd ..

# convert neologd format to mozc format
echo "convert neologd format to mozc format..."
ruby convert-neologd-format-to-mozc-format.rb mecab-user-dict-seed.$NEOLOGDDATE.csv

cat chimei/KEN_ALL.CSV.modzip.costs neologd.costs > utdic.costs

# split new entries
echo "split new entries..."
ruby split-new-entries.rb utdic.costs

# get wikipedia titles
echo "get wikipedia titles..."
cd wikipedia/
ruby count-entries.rb jawiki-latest-all-titles-in-ns0
cd -

# fix jinmei costs and filter entries
echo "fix jinmei costs and filter entries..."
cat wikipedia/jawiki-*.count utdic.costs.newentries > utdic.costs.newentries.wikipedia
ruby fix-costs-and-filter-entries.rb utdic.costs.newentries.wikipedia
mv utdic.costs.newentries.wikipedia.fixcosts ../mozcdic-neologd-ut-$MOZCVER.$NEOLOGDDATE.$REVISION.txt
mv zipcode.costs ../mozcdic-zipcode-ut-$MOZCVER.$NEOLOGDDATE.$REVISION.txt


# ==============================================================================
# make release tarball
# ==============================================================================

rm -rf mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/
mkdir -p mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/src/{chimei,mecab-ipadic-neologd,wikipedia}

cd mecab-ipadic-neologd/
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/COPYING
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/ChangeLog
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/README.ja.md
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/README.md
cd -

cp {filter-ut.txt,make-release-tarball.sh,convert-neologd-*.rb,fix-costs-*.rb,split-new-*.rb} mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/src/
cp chimei/{generate-*.rb,modify-ken_all.rb} mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/src/chimei/
cp wikipedia/count-entries.rb mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/src/wikipedia/
cp mecab-ipadic-neologd/{COPYING,ChangeLog,README.ja.md,README.md} mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/src/mecab-ipadic-neologd/
cp ../{AUTHORS,ChangeLog,COPYING,PKGBUILD,README.md,mozcdic-*-ut-$MOZCVER.$NEOLOGDDATE.$REVISION.txt} mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/

rm -f ../../mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION.tar.bz2
tar -jcf ../../mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION.tar.bz2 mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/
rm -rf mozcdic-neologd-ut-$NEOLOGDDATE.$REVISION/

