#!perl -T use strict; use warnings; use utf8; use Test::More tests => 18; use Data::Dumper; use Encode qw(encode); use XML::Rules; my $data_utf8 = "P\x{159}\x{ed}li\x{17e} \x{17e}lu\x{b4}tou\x{10d}k\x{fd} k\x{fa}\x{148} \x{fa}p\x{11b}l \x{161}\x{ed}len\x{e9} \x{f3}dy."; # in case you wonder ... The crazy looking stuff above is a Czech sentence commonly used to test the encodings. It contains all accentuated characters used in Czech and still kinda makes sense. # It translates as "Too yellow horse moaned crazy odes." I did say "kinda" ;-) my $data_windows = encode( 'windows-1250', $data_utf8); my $data_latin2 = encode( 'ISO-8859-2', $data_utf8); #content { my $xml_utf8 = <<"*END*"; $data_utf8 *END* my $xml_windows = <<"*END*"; $data_windows *END* my $xml_latin2 = <<"*END*"; $data_latin2 *END* { #1-3 my $parser = XML::Rules->new( rules => {_default => 'content'}, ); my $res_utf8 = $parser->parse($xml_utf8); # print Dumper($res_utf8); is($res_utf8->{data}, $data_utf8, "Parse XML in utf8"); my $res_windows = $parser->parse($xml_windows); # print Dumper($res_windows); is($res_windows->{data}, $data_utf8, "Parse XML in windows-1250"); my $res_latin2 = $parser->parse($xml_latin2); # print Dumper($res_latin2); is($res_latin2->{data}, $data_utf8, "Parse XML in latin2"); } { #4-6 my $parser = XML::Rules->new( rules => {_default => 'content'}, encode => 'windows-1250', ); my $res_utf8 = $parser->parse($xml_utf8); # print Dumper($res_utf8); is($res_utf8->{data}, $data_windows, "Parse XML in utf8, return in windows-1250"); my $res_windows = $parser->parse($xml_windows); # print Dumper($res_windows); is($res_windows->{data}, $data_windows, "Parse XML in windows-1250, return in windows-1250"); my $res_latin2 = $parser->parse($xml_latin2); # print Dumper($res_latin2); is($res_latin2->{data}, $data_windows, "Parse XML in latin2-1250, return in windows-1250"); } { #5-9 my $parser = XML::Rules->new( rules => {_default => 'content'}, encode => 'ISO-8859-2', ); my $res_utf8 = $parser->parse($xml_utf8); # print Dumper($res_utf8); is($res_utf8->{data}, $data_latin2, "Parse XML in utf8, return in ISO-8859-2"); my $res_windows = $parser->parse($xml_windows); # print Dumper($res_windows); is($res_windows->{data}, $data_latin2, "Parse XML in windows-1250, return in ISO-8859-2"); my $res_latin2 = $parser->parse($xml_latin2); # print Dumper($res_latin2); is($res_latin2->{data}, $data_latin2, "Parse XML in ISO-8859-2, return in ISO-8859-2"); } } # attributes { my $xml_utf8 = <<"*END*"; *END* my $xml_windows = <<"*END*"; *END* my $xml_latin2 = <<"*END*"; *END* { #1-3 my $parser = XML::Rules->new( rules => {data => sub {data => $_[1]->{str}}}, ); my $res_utf8 = $parser->parse($xml_utf8); # print Dumper($res_utf8); is($res_utf8->{data}, $data_utf8, "Parse XML in utf8"); my $res_windows = $parser->parse($xml_windows); # print Dumper($res_windows); is($res_windows->{data}, $data_utf8, "Parse XML in windows-1250"); my $res_latin2 = $parser->parse($xml_latin2); # print Dumper($res_latin2); is($res_latin2->{data}, $data_utf8, "Parse XML in latin2"); } { #4-6 my $parser = XML::Rules->new( rules => {data => sub {data => $_[1]->{str}}}, encode => 'windows-1250', ); my $res_utf8 = $parser->parse($xml_utf8); # print Dumper($res_utf8); is($res_utf8->{data}, $data_windows, "Parse XML in utf8, return in windows-1250"); my $res_windows = $parser->parse($xml_windows); # print Dumper($res_windows); is($res_windows->{data}, $data_windows, "Parse XML in windows-1250, return in windows-1250"); my $res_latin2 = $parser->parse($xml_latin2); # print Dumper($res_latin2); is($res_latin2->{data}, $data_windows, "Parse XML in latin2-1250, return in windows-1250"); } { #5-9 my $parser = XML::Rules->new( rules => {data => sub {data => $_[1]->{str}}}, encode => 'ISO-8859-2', ); my $res_utf8 = $parser->parse($xml_utf8); # print Dumper($res_utf8); is($res_utf8->{data}, $data_latin2, "Parse XML in utf8, return in ISO-8859-2"); my $res_windows = $parser->parse($xml_windows); # print Dumper($res_windows); is($res_windows->{data}, $data_latin2, "Parse XML in windows-1250, return in ISO-8859-2"); my $res_latin2 = $parser->parse($xml_latin2); # print Dumper($res_latin2); is($res_latin2->{data}, $data_latin2, "Parse XML in ISO-8859-2, return in ISO-8859-2"); } }