# ---------------------------------------------------------------- # this test script is written in utf8 but does not "use utf8" for 5.005-compatibility # ---------------------------------------------------------------- use strict; use Test::More; # ---------------------------------------------------------------- { local $@; eval { require 5.008001; }; plan skip_all => 'Perl 5.8.1 is required.' if $@; } # ---------------------------------------------------------------- { plan tests => 155; use_ok('XML::TreePP'); &test_main(); } # ---------------------------------------------------------------- sub test_main { my $src = {}; $src->{Plain} = <<"EOT"; AA zz ©© ëë んん 漢漢 EOT $src->{XMLref} = <<"EOT"; AA zz ©© ëë んん 漢漢 EOT $src->{Mixed} = <<"EOT"; AA zz ©© ëë んん 漢漢 EOT foreach my $key ( keys %$src ) { phase2( "$key octets", $src->{$key} ); my $copy = $src->{$key}; utf8::decode( $copy ); next unless utf8::is_utf8($copy); phase2( "$key string", $copy ); } } # ---------------------------------------------------------------- sub phase2 { my $subject = shift; my $srcxml = shift; my $srcutf8 = utf8::is_utf8($srcxml); my $srcref = ( $srcxml =~ /&#\w+;/ ); foreach my $utf8_flag ( 0, 1 ) { my $subj2 = $subject .( $utf8_flag ? ' utf8_flag' : '' ); foreach my $xml_deref ( 0, 1 ) { my $subj3 = $subj2 .( $xml_deref ? ' xml_deref' : '' ); my $opt = { utf8_flag => $utf8_flag, xml_deref => $xml_deref, }; my $tpp = XML::TreePP->new( %$opt ); my $tree = $tpp->parse( $srcxml ); if ( $xml_deref ) { if ( $srcutf8 || $utf8_flag ) { check_string( $subj3, $tree ); } else { check_octets( $subj3, $tree ); } } else { if ( $srcref ) { check_has_ref( $subj3, $tree ); } else { check_no_ref( $subj3, $tree ); } } } } } # ---------------------------------------------------------------- sub check_has_ref { my $subject = shift; my $tree = shift; my $root = $tree->{root}; my $HAS_REF = qr/&#\w+;/; # \x00-\x7F is always dereferenced. # like( $root->{a}, $HAS_REF, "[$subject] has_ref: a" ); # like( $root->{z}, $HAS_REF, "[$subject] has_ref: z" ); like( $root->{c}, $HAS_REF, "[$subject] has_ref: c" ); like( $root->{e}, $HAS_REF, "[$subject] has_ref: e" ); like( $root->{n}, $HAS_REF, "[$subject] has_ref: n" ); like( $root->{k}, $HAS_REF, "[$subject] has_ref: k" ); } # ---------------------------------------------------------------- sub check_no_ref { my $subject = shift; my $tree = shift; my $root = $tree->{root}; my $HAS_REF = qr/&#\w+;/; unlike( $root->{a}, $HAS_REF, "[$subject] no_ref: a" ); unlike( $root->{z}, $HAS_REF, "[$subject] no_ref: z" ); unlike( $root->{c}, $HAS_REF, "[$subject] no_ref: c" ); unlike( $root->{e}, $HAS_REF, "[$subject] no_ref: e" ); unlike( $root->{n}, $HAS_REF, "[$subject] no_ref: n" ); unlike( $root->{k}, $HAS_REF, "[$subject] no_ref: k" ); } # ---------------------------------------------------------------- sub check_string { my $subject = shift; my $tree = shift; my $root = $tree->{root}; # \x00-\x7F never have utf8 flag # ok( utf8::is_utf8($root->{a}), "[$subject] is_utf8: a" ); # ok( utf8::is_utf8($root->{z}), "[$subject] is_utf8: z" ); ok( utf8::is_utf8($root->{c}), "[$subject] is_utf8: c" ); ok( utf8::is_utf8($root->{e}), "[$subject] is_utf8: e" ); ok( utf8::is_utf8($root->{n}), "[$subject] is_utf8: n" ); ok( utf8::is_utf8($root->{k}), "[$subject] is_utf8: k" ); is( $root->{a}, chr(0x0041) x 2, "[$subject] ok: a" ); is( $root->{z}, chr(0x007A) x 2, "[$subject] ok: z" ); is( $root->{c}, chr(0x00A9) x 2, "[$subject] ok: c" ); is( $root->{e}, chr(0x00EB) x 2, "[$subject] ok: e" ); is( $root->{n}, chr(0x3093) x 2, "[$subject] ok: n" ); is( $root->{k}, chr(0x6F22) x 2, "[$subject] ok: k" ); } # ---------------------------------------------------------------- sub check_octets { my $subject = shift; my $tree = shift; my $root = $tree->{root}; ok( ! utf8::is_utf8($root->{a}), "[$subject] is_octets: a" ); ok( ! utf8::is_utf8($root->{z}), "[$subject] is_octets: z" ); ok( ! utf8::is_utf8($root->{c}), "[$subject] is_octets: c" ); ok( ! utf8::is_utf8($root->{e}), "[$subject] is_octets: e" ); ok( ! utf8::is_utf8($root->{n}), "[$subject] is_octets: n" ); ok( ! utf8::is_utf8($root->{k}), "[$subject] is_octets: k" ); is( $root->{a}, 'AA', "[$subject] ok: a" ); is( $root->{z}, 'zz', "[$subject] ok: z" ); is( $root->{c}, '©©', "[$subject] ok: c" ); is( $root->{e}, 'ëë', "[$subject] ok: e" ); is( $root->{n}, 'んん', "[$subject] ok: n" ); is( $root->{k}, '漢漢', "[$subject] ok: k" ); } # ---------------------------------------------------------------- ;1; # ----------------------------------------------------------------