#!/usr/bin/perl
use strict('vars');
use Switch;
use Encode;
use LWP::Simple;
use HTML::Parser ();
use lib './lib';
use CSSJ::Driver("create_driver_for");

#--- define
my $copper_IP   = "127.0.0.1";
my $copper_PORT = 8099;
my $copper_USER   = "user";
my $copper_PASSWORD = "kappa";

#--- begin
my $list_url;    # 例規集のURL
my $in_HTML;     # ファイルから作成の場合のHTML
my $out_HTML;    # 出力するHTMLファイル名
my $img_path;    # 画像ファイルの保存パス
my $out_PDF;     # 出力するPDFファイル名

if ( parse_option() ) {
	print "
Usage: reiki.pl [-url URL] [-in_html IN_HTML] [-out_html OUT_HTML] [-pdf OUT_PDF] [-cp_host HOST] [-cp_port PORT] [-cp_user USER] [-cp_password PASSWORD]
  URL(in)       収集する例規集のトップページ(/reiki_menu.htmlで終わる)URL
  IN_HTML(in)   指定されたHTMLファイルをもとにPDFを生成
  OUT_HTML(out) 出力するHTMLファイル名
  OUT_PDF(out)  出力するPDFファイル名
  HOST(in)      CopperPDFサーバが起動しているホスト名
  PORT(in)      CopperPDFサーバのポート番号
  USER(in)      CopperPDFサーバのユーザー名
  PASSWORD(in)  CopperPDFサーバのパスワード
";
	exit;
}

if ( !$out_HTML && $in_HTML ) {
	$out_HTML = $in_HTML;
}

if ( !$out_HTML && $out_PDF ) {
	$out_HTML = substr( $out_PDF, 0, rindex( $out_PDF, '.' ) ) . ".html";
}
my $img_path =
  substr( "./" . $out_HTML, 0, rindex( "./" . $out_HTML, '/' ) ) . "/img/";
if ( !-d $img_path ) {
	print "create img path '$img_path'.\r\n";
	mkdir $img_path;
}

my ( $list_path, $list_html );
my ( $title_hen, $title_syo, $title_name );    # 編、章、タイトル
my ( $new_hen,   $new_syo, $new_title );       # ヘッダ出力用
my ( $href,      $addpath );                   # URL , ファイル名
my $get_content_flag = 0;                      # 本文取得フラグ
my ( $get_enkaku_flag, $enkaku, $enkaku_cnt ); # 沿革を取得する変数
my $seiteino_flag = 0;    # 沿革を挿入するためのフラグ
my $yousiki_flag  = 0;    # 様式省略用フラグ
my @images;
my $p;

if ($list_url) {
	GetURL2HTML();
	$in_HTML = $out_HTML;
}

if ($out_PDF) {
	print "PDF Creating....\r\n";
	pdf_output($in_HTML);
	print "PDF Create completed.\r\n\r\n";
	exit;
}

#------------------------------------------------------------------------------
#      parse option
sub parse_option {

	my $err = 0;

	for ( my $i = 0 ; $i < @ARGV ; $i++ ) {
		switch ( $ARGV[$i] ) {
			case '-url' {
				$list_url = $ARGV[ ++$i ];
			}
			case '-out_html' {
				$out_HTML = $ARGV[ ++$i ];
			}
			case '-in_html' {
				$in_HTML = $ARGV[ ++$i ];
			}
			case '-pdf' {
				$out_PDF = $ARGV[ ++$i ];
			}
			case '-cp_host' {
				$copper_IP = $ARGV[ ++$i ];
			}
			case '-cp_port' {
				$copper_PORT = $ARGV[ ++$i ];
			}
			case '-cp_user' {
				$copper_USER = $ARGV[ ++$i ];
			}
			case '-cp_password' {
				$copper_PASSWORD = $ARGV[ ++$i ];
			}
			else {
				print "invalid switch '" . $ARGV[$i] . "'\r\n";
				$err++;
			}
		}
	}

	if ( $list_url && $in_HTML ) {
		print "入力を -url と -in_html を同時指定できません\r\n";
		$err++;
	}
	if ( !$list_url && !$in_HTML ) {
		print "入力を -url か -in_html で、指定して下さい\r\n";
		$err++;
	}
	if ( !$out_PDF && !$out_HTML ) {
		print "出力を -pdf か -out_html で、指定して下さい\r\n";
		$err++;
	}
	if ( $in_HTML && $out_HTML ) {
		print
"入力を -in_html 出力を -out_html は、実行されません\r\n";
		$err++;
	}
	return $err;
}

#------------------------------------------------------------------------------
#      URLからhtmlを作成
sub GetURL2HTML {

	# もくじ

	open( DIST_FP, ">$out_HTML" );
	print DIST_FP '<!DOCTYPE HTML PUBLIC -//IETF//DTD HTML//EN>
	<html>
	<head>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
	<link rel="stylesheet" href="reiki.css" type="text/css" />
	</head>
	<body>
	<div id="nombre-left"></div>
	<div id="nombre-right"></div>
	';

	my $content = encode( 'utf-8', decode( 'shiftjis', get($list_url) ) );
	$p = HTML::Parser->new(
		api_version     => 3,
		start_h         => [ \&menu_html, "self ,tagname, attr ,text" ],
		text_h          => [ \&menu_text, "text" ],
		marked_sections => 1,
	);
	$p->parse($content);    # メニューから'体系目次'リンクを取得

	my $content = encode( 'utf-8', decode( 'shiftjis', get($list_url) ) );
	$p = HTML::Parser->new(
		api_version     => 3,
		start_h         => [ \&mokuji_html, "self ,tagname, attr ,text" ],
		marked_sections => 1,
	);
	$p->parse($content);    # '体系目次'のHTML取得

	$list_path = substr( $list_url, 0, rindex( $list_url, '/' ) + 1 );
	$list_html = substr( $list_url, rindex( $list_url, '/' ) + 1 );

	my $content = encode( 'utf-8', decode( 'shiftjis', get($list_url) ) );
	$p = HTML::Parser->new(
		api_version     => 3,
		start_h         => [ \&hen_html, "self ,tagname, attr ,text" ],
		marked_sections => 1,
	);
	$p->parse($content);

	print DIST_FP '</body></html>';
	close(DIST_FP);
	print "\r\nHTML Complete.\r\n";
	print "CSS Complete.\r\n";
}

#------------------------------------------------------------------------------
#      "メニュー" html
sub menu_html {
	my ( $self, $tagname, $attr, $text ) = @_;
	if ( $tagname eq "a" ) {
		$list_url =
		  substr( $list_url, 0, rindex( $list_url, '/' ) + 1 ) . $attr->{href};
		$p->eof;
	}
}

#------------------------------------------------------------------------------
#      "メニュー" text
sub menu_text {
	my ($text) = @_;
	if ( $text =~ /例規集/ ) {
		print DIST_FP '<div class="page_title">';
		print DIST_FP $text;
		print DIST_FP "</div>";
	}
}

#------------------------------------------------------------------------------
#      "もくじ" html
sub mokuji_html {
	my ( $self, $tagname, $attr, $text ) = @_;
	if ( $tagname eq "frame" && $attr->{name} eq "left" ) {
		$list_url =
		  substr( $list_url, 0, rindex( $list_url, '/' ) + 1 ) . $attr->{src};
		$p->eof;
	}
}

#------------------------------------------------------------------------------
#      "編" html
sub hen_html {
	my ( $self, $tagname, $attr, $text ) = @_;
	if ( $tagname eq "a" && $attr->{target} eq "main" ) {
		my $list = $attr->{href};
		my $content =
		  encode( 'utf-8', decode( 'shiftjis', get( $list_path . $list ) ) );
		my $p = HTML::Parser->new(
			api_version     => 3,
			start_h         => [ \&syo_html, "tagname, attr ,text" ],
			end_h           => [ \&syo_hend, "tagname ,attr ,text" ],
			text_h          => [ \&syo_text, "text" ],
			marked_sections => 1,
		);
		$p->parse($content);
	}
}

#------------------------------------------------------------------------------
#      "章" html
sub syo_html {
	my ( $tagname, $attr, $text ) = @_;
	if ( $tagname ne "a" ) { return; }
	$href = $attr->{href};
	$href =~ /(.*\/)*/;
	$addpath = $1;
}

#------------------------------------------------------------------------------
#      "章" /html
sub syo_hend {
	my ( $tagname, $attr, $text ) = @_;
	if ( $tagname ne "a" ) { return; }
	print "#$title_hen>$title_syo>$title_name\r\n";
	my $content =
	  encode( 'utf-8', decode( 'shiftjis', get( $list_path . $href ) ) );
	my $p = HTML::Parser->new(
		api_version     => 3,
		start_h         => [ \&page_html, "tagname, attr ,text" ],
		marked_sections => 1,
	);
	$p->parse($content);
}

#------------------------------------------------------------------------------
#      "章" text
sub syo_text {
	my ($text) = @_;
	if ( length($text) <= 2 ) { return; }
	$text =~ /(第[0-9]+)(編|章)(.*)/;
	my $n1 = $1;
	my $n2 = $2;
	my $n3 = $3;
	$n3 =~ s/　　//;
	if ( $n2 eq "編" ) {
		print "------------------------------------------------------------------------------\r\n";
		my $title_hen = $n1 . $n2 . $n3;

#		if($title_hen eq "第1編　総規") { $p->eof; }	# デバッグ用　第１編のみ出力して終了
		return;
	}
	if ( $n2 eq "章" ) {
		$title_syo = $n1 . $n2 . $n3;
		return;
	}
	$title_name = $text;
}

#------------------------------------------------------------------------------
#      "頁" html
sub page_html {
	my ( $tagname, $attr, $text ) = @_;
	my $url;

	if ( $tagname eq "frame" && $attr->{name} eq "left" ) {
		$attr->{src} =~ s/enkaku\/mc/enkaku\/kc/;
		$url             = $list_path . $addpath . $attr->{src};
		$get_enkaku_flag = 0;
		$enkaku_cnt      = -2;
		$enkaku          = "<table class=\"kaisei\"><tr><th>改正</th>";
		my $content = encode( 'utf-8', decode( 'shiftjis', get($url) ) );
		my $p = HTML::Parser->new(
			api_version     => 3,
			text_h          => [ \&enkaku_text, "text" ],
			marked_sections => 1,
		);
		$p->parse($content);
		$enkaku .= '</tr></table><br clear="all" />';
		if ( length($enkaku) <= 72 ) { $enkaku = "<br />"; }
	}
	if ( $tagname eq "frame" && $attr->{name} eq "main" ) {
		$url = $list_path . $addpath . $attr->{src};
		my $content = encode( 'utf-8', decode( 'shiftjis', get($url) ) );
		my $p = HTML::Parser->new(
			api_version     => 3,
			start_h         => [ \&content_html, "tagname, attr ,text" ],
			end_h           => [ \&content_hend, "tagname ,attr ,text" ],
			text_h          => [ \&content_text, "text" ],
			marked_sections => 1,
		);
		$p->parse($content);
	}
}

#------------------------------------------------------------------------------
#      "沿革" text
sub enkaku_text {
	my ($text) = @_;
	if ( length($text) <= 4 ) { return; }
	if ( $text eq "沿革情報" ) {
		$get_enkaku_flag = 1;
		return;
	}
	if ($get_enkaku_flag) {
		if ( $enkaku_cnt < 0 ) { $enkaku_cnt++; return; }
		if ( $enkaku_cnt % 4 == 0 && $enkaku_cnt != 0 ) {
			$enkaku .= '<td></td>';
		}
		if ( $enkaku_cnt % 4 == 0 ) {
			$enkaku .= '<td class="left">';
		}
		if ( $enkaku_cnt % 4 == 2 ) {
			$enkaku .= "<td>";
		}
		$enkaku .= $text;
		if ( $enkaku_cnt % 4 == 1 || $enkaku_cnt % 4 == 3 ) {
			$enkaku .= "</td>";
		}
		if ( $enkaku_cnt % 4 == 3 ) { $enkaku .= "</tr><tr>"; }
		$enkaku_cnt++;
	}
}

#------------------------------------------------------------------------------
#      "本文" html
sub content_html {
	my ( $tagname, $attr, $text ) = @_;
	if ( $tagname eq "body" ) {
		$get_content_flag = 1;
		if ( $new_hen ne $title_hen ) {
			$new_hen = $title_hen;
			print DIST_FP "<h1>$title_hen</h1>";
			print DIST_FP '<div class="reiki">';
			print DIST_FP '
			<div id="header-left"><span class="h1"></span><span class="h2"><img src="lb.svgz" class="lb"/><img src="rb.svgz" class="rb"/></span></div>
			<div id="header-right"><span class="h1"></span><span class="h2"><img src="lb.svgz" class="lb"/><img src="rb.svgz" class="rb"/></span></div>
			';
		}
		else {
			print DIST_FP '<div class="reiki">';
		}

		if ( $new_syo ne $title_syo ) {
			$new_syo = $title_syo;
			print DIST_FP "<h2>$title_syo</h2>";
		}
		if ( $new_title ne $title_name ) {
			$new_title = $title_name;
			print DIST_FP "<h3>$title_name</h3>";
		}
		return;
	}
	if ( $tagname eq "div" && $attr->{class} eq "sagari1" ) {
		$yousiki_flag = 1;
	}
	if ( $tagname eq "table" && $yousiki_flag ) {
		print DIST_FP "<div class=\"syouryaku\">(省略)</div>";
		$get_content_flag = 0;
	}
	if ( $tagname eq "div" && $attr->{class} eq "seiteino" ) {
		$seiteino_flag = 1;
	}
	if ( $tagname eq "img" ) {
		$attr->{src} =~ s/\.\///;

		my $fname = $attr->{src};
		if ( rindex( $fname, "/" ) ) {
			$fname = substr( $fname, rindex( $fname, "/" ) + 1 );
		}
		print "fwrite " . $img_path . $fname . "\r\n";
		open( IFP, ">" . $img_path . $fname );
		binmode(IFP);
		print IFP get( $list_path . $addpath . $attr->{src} );
		print IFP "image";
		close(IFP);
		push( @images, $attr->{src} );
		$text = "<img";

		foreach my $at ( keys %$attr ) {
			$text .= " $at=\"$attr->{$at}\"";
		}
		$text .= ">";
	}
	if ( $get_content_flag == 1 ) {
		print DIST_FP $text;
	}
}

#------------------------------------------------------------------------------
#      "本文" /html
sub content_hend {
	my ( $tagname, $attr, $text ) = @_;
	if ( $tagname eq "body" ) {
		$get_content_flag = 0;
		print DIST_FP "</DIV>";
		return;
	}
	if ( $get_content_flag == 1 ) {
		print DIST_FP $text;
	}
	if ( $tagname eq "table" && $yousiki_flag ) {
		$get_content_flag = 1;
	}
	if ( $tagname eq "div" && $seiteino_flag ) {
		print DIST_FP $enkaku;
		$seiteino_flag = 0;
	}
}

#------------------------------------------------------------------------------
#      "本文" text
sub content_text {
	my ($text) = @_;
	if ( length($text) <= 2 ) { return; }
	if ( $yousiki_flag == 1 ) {
		$yousiki_flag++;
	}
	if ( $get_content_flag == 1 ) {
		$text =~ s/^(第[0-9]+条　)/<span class="jou_head">$1<\/span>/;
		$text =~ s/^(附　則)/<span class="fusoku_head">$1<\/span>/;
		print DIST_FP $text;
	}
}

#------------------------------------------------------------------------------
#      PDF 出力
sub pdf_output {
	my ($content_file) = @_;

	open( FP, ">$out_PDF" );
	binmode(FP);

	# INIT
	my $driver = create_driver_for( $copper_IP, $copper_PORT, "utf-8" );
	my $session = $driver->create_session( $copper_USER, $copper_PASSWORD );
	if ( !$session ) {
		print STDERR "CopperPDFサーバが見つかりません\r\n";
		exit;
	}
	$session->set_output(FP);
	$session->set_error_func(
		sub { print STDERR "Msg" . $_[0] . ">" . $_[1] . "\n"; } );
	$session->set_property( 'output.pdf.compression', 'none' );
	$session->set_property( 'output.pdf.bookmarks',   'true' );
	$session->set_property( 'output.page-width',      '148mm' );
	$session->set_property( 'output.page-height',     '210mm' );

	# RESOURCES
	my $rsrc_path = 'resources/';
	opendir DH, $rsrc_path;
	while ( my $rsrc_file = readdir DH ) {
		next if $rsrc_file =~ /^\.{1,2}$/;    # '.'や'..'はスキップ
		my $mime_type = undef;
		if ( $rsrc_file =~ /\.svg$/ || $rsrc_file =~ /\.svgz$/ ) {

			# SVG拡張子を認識
			$mime_type = 'image/svg+xml';
		}
		$session->start_resource( STDOUT, 'file:' . $rsrc_file, $mime_type );
		binmode(STDOUT);
		open( IN_RSRC, $rsrc_path . $rsrc_file );
		while (<IN_RSRC>) { print; }
		close(IN_RSRC);
		$session->end_resource(STDOUT);
	}
	closedir DH;

	# IMAGES
	opendir DH, $img_path;
	while ( my $img_file = readdir DH ) {
		next if $img_file =~ /^\.{1,2}$/;    # '.'や'..'はスキップ
		$session->start_resource( STDOUT, 'file:img/' . $img_file );
		binmode(STDOUT);
		open( IN_IMG, $img_path . $img_file );
		while (<IN_IMG>) { print; }
		close(IN_IMG);
		$session->end_resource(STDOUT);
	}
	closedir DH;

	# HTML
	$session->start_main( STDOUT, 'file:reiki.html' );
	open( IN_HTML, $content_file );
	while (<IN_HTML>) { print; }
	close(IN_HTML);
	$session->end_main(STDOUT);

	$session->close();
	close(FP);

}
