use strict; # -*- perl -*- use Test; use constant PLAN => 32; BEGIN { plan tests => PLAN; unless (eval { require Encode; import Encode; 1 }) { skip("this test requires Encode.pm\n") for (1..PLAN); exit; } } use XML::LibXSLT; use XML::LibXML; my $parser = XML::LibXML->new(); ok( $parser ); my $xslt = XML::LibXSLT->new(); { # U+0100 == LATIN CAPITAL LETTER A WITH MACRON my $doc = $parser->parse_string(<\x{0100}dam XML ok( $doc ); my $style_doc = $parser->parse_string(< XSLT ok( $style_doc ); my $stylesheet = $xslt->parse_stylesheet($style_doc); ok( $stylesheet ); my $results = $stylesheet->transform($doc); ok( $results ); my $output = $stylesheet->output_string( $results ); ok( $output ); # Test that we've correctly converted to characters seeing as the # output format was UTF-8. ok( Encode::is_utf8($output) ); ok( $output, "\x{0100}dam" ); $output = $stylesheet->output_as_chars( $results ); ok( Encode::is_utf8($output) ); ok( $output, "\x{0100}dam" ); $output = $stylesheet->output_as_bytes( $results ); ok( !Encode::is_utf8($output) ); ok( $output, "\xC4\x80dam" ); } # LATIN-2 character 17E - z caron my $doc = $parser->parse_string(< \x{17E}il XML ok( $doc ); # no encoding: libxslt chooses either an entity or UTF-8 { my $style_doc = $parser->parse_string(< XSLT ok( $style_doc ); my $stylesheet = $xslt->parse_stylesheet($style_doc); ok( $stylesheet ); my $results = $stylesheet->transform($doc); ok( $results ); my $output = $stylesheet->output_string( $results ); ok( !Encode::is_utf8($output) ); ok( $output =~ /^(?:ž|\xC5\xBE)il/ ); $output = $stylesheet->output_as_chars( $results ); ok( Encode::is_utf8($output) ); ok( $output, "\x{17E}il" ); $output = $stylesheet->output_as_bytes( $results ); ok( !Encode::is_utf8($output) ); ok( $output =~ /^(?:ž|\xC5\xBE)il/ ); } # doesn't map to latin-1 so will appear as an entity { my $style_doc = $parser->parse_string(< XSLT ok( $style_doc ); my $stylesheet = $xslt->parse_stylesheet($style_doc); ok( $stylesheet ); my $results = $stylesheet->transform($doc); ok( $results ); my $output = $stylesheet->output_string( $results ); ok( $output ); ok( !Encode::is_utf8($output) ); ok( $output, "žil" ); $output = $stylesheet->output_as_chars( $results ); ok( Encode::is_utf8($output) ); ok( $output, "\x{17E}il" ); $output = $stylesheet->output_as_bytes( $results ); ok( !Encode::is_utf8($output) ); ok( $output, "žil" ); }