#escapeChars {return} #use lib '../lib'; use strict; use warnings; use HTML::Copy; use utf8; use File::Spec::Functions; #use Data::Dumper; use Test::More tests => 16; sub read_and_unlink { my ($path, $htmlcopy) = @_; open(my $in, "<".$htmlcopy->io_layer(), $path) or die "Can't open $path."; my $contents = do {local $/; <$in>}; close $in; unlink($path); return $contents; } ##== prepare linked HTML file my $linked_html = < EOT my $linked_file_name = "linked$$.html"; open(my $linked_fh, ">", $linked_file_name) or die "Can't open $linked_file_name."; print $linked_fh $linked_html; close $linked_fh; ##== HTML data without charsets my $source_html_nocharset = < ああ EOT my $result_html_nocharset = < ああ EOT ##== write test data my $sub_dir_name = "sub$$"; mkdir($sub_dir_name); my $src_file_name = "file$$.html"; my $destination = catfile($sub_dir_name, $src_file_name); ##== Test code with no charsets HTML open(my $src_fh, ">:utf8", $src_file_name) or die "Can't open $src_file_name."; print $src_fh $source_html_nocharset; close $src_fh; ##=== parse_to UTF-8 my $p = HTML::Copy->new($src_file_name); my $copy_html = $p->parse_to($destination); ok($copy_html eq $result_html_nocharset, "parse_to no charset UTF-8"); ##=== copty_to UTF8 $p->copy_to($destination); open(my $in, "<".$p->io_layer(), $destination) or die "Can't open $destination."; $copy_html = read_and_unlink($destination, $p); ok($copy_html eq $result_html_nocharset, "copy_to no charset UTF-8"); ##=== write data with shift_jis open($src_fh, ">:encoding(shiftjis)", $src_file_name) or die "Can't open $src_file_name."; print $src_fh $source_html_nocharset; close $src_fh; ##=== parse_to shift_jis $p = HTML::Copy->new($src_file_name); $copy_html = do { $p->encode_suspects("shiftjis"); $p->parse_to(catfile($sub_dir_name, $src_file_name)); }; ok($copy_html eq $result_html_nocharset, "parse_to no charset shift_jis"); ##=== copy_to shift_jis $copy_html = do { $p->copy_to($destination); open(my $in, "<".$p->io_layer, $destination) or die "Can't open $destination."; read_and_unlink($destination, $p); }; ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis"); ##== HTML with charset uft-8 my $src_html_utf8 = < ああ EOT my $result_html_utf8 = < ああ EOT ##== Test code with charset utf-8 open($src_fh, ">:utf8", $src_file_name) or die "Can't open $src_file_name."; print $src_fh $src_html_utf8; close $src_fh; ##=== parse_to $p = HTML::Copy->new($src_file_name); $copy_html = $p->parse_to($destination); ok($copy_html eq $result_html_utf8, "parse_to charset UTF-8"); ##=== copy_to $p->copy_to($destination); $copy_html = read_and_unlink($destination, $p); ok($copy_html eq $result_html_utf8, "copy_to charset UTF-8"); ##=== copy_to gving a file handle $copy_html = do { open $in, "<", \$src_html_utf8; my $outdata ='';; my $p = HTML::Copy->new($in); open my $out, ">", $destination; $p->destination_path($destination); $p->copy_to($out); close $out; read_and_unlink($destination, $p); }; ok($copy_html eq $result_html_utf8, "copy_to giviing a file handle"); ##=== copy_to gving file handles for input and output $copy_html = do { open my $in, "<", \$src_html_utf8; my $outdata; my $p = HTML::Copy->new($in); open my $out, ">".$p->io_layer(), \$outdata; $p->destination_path($destination); $p->copy_to($out); Encode::decode($p->encoding, $outdata); }; ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output"); ##=== parse_to giving a file handle $copy_html = do { open my $in, "<", \$src_html_utf8; my $p = HTML::Copy->new($in); $p->parse_to($destination); }; ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output"); ##=== copy_to with directory destination $copy_html = do { my $p = HTML::Copy->new($src_file_name); my $destination = $p->copy_to($sub_dir_name); read_and_unlink($destination, $p); }; ok($copy_html eq $result_html_utf8, "copy_to with a directory destination"); ##== HTML with charset shift_jis my $src_html_shiftjis = < ああ EOT my $result_html_shiftjis = < ああ EOT ##== Test code with charset shift_jis open($src_fh, ">:encoding(shiftjis)", $src_file_name) or die "Can't open $src_file_name."; print $src_fh $src_html_shiftjis; close $src_fh; ##=== parse_to $p = HTML::Copy->new($src_file_name); $p->encode_suspects("shiftjis"); $copy_html = $p->parse_to($destination); ok($copy_html eq $result_html_shiftjis, "parse_to no charset shift_jis"); ##=== copy_to $p->copy_to($destination); $copy_html = read_and_unlink($destination, $p); ok($copy_html eq $result_html_shiftjis, "copy_to no charset shift_jis"); ##== class_methods $copy_html = HTML::Copy->parse_file($src_file_name, $destination); ok($copy_html eq $result_html_shiftjis, "parse_file"); HTML::Copy->htmlcopy($src_file_name, $destination); open($in, "<".$p->io_layer, $destination) or die "Can't open $destination."; {local $/; $copy_html = <$in>}; close $in; ok($copy_html eq $result_html_shiftjis, "htmlcopy"); unlink($destination); ##== Test with base url my $src_html_base = < ああ EOT ##== Test code with base url open($src_fh, ">:utf8", $src_file_name) or die "Can't open $destination."; print $src_fh $src_html_base; close $src_fh; ##=== parse_to $p = HTML::Copy->new($src_file_name); $copy_html = $p->parse_to($destination); ok($copy_html eq $src_html_base, "parse_to HTML with base URL"); ##=== copy_to $p->copy_to($destination); open($in, "<".$p->io_layer, $destination) or die "Can't open $destination."; {local $/; $copy_html = <$in>}; close $in; ok($copy_html eq $src_html_base, "copy_to HTML with base URL"); unlink($destination); unlink($linked_file_name, $src_file_name, $destination); rmdir($sub_dir_name);