use v6-alpha; use Test; plan 11; use HTML::Entities; pass "(dummy instead of broken use_ok)"; my $a = "Våre norske tegn bør æres"; $a = decode_entities($a); is $a, "Våre norske tegn bør æres", 'Decoding entities should work'; $a = encode_entities($a); is $a, "Våre norske tegn bør æres", '... and encoding entities should work'; $a = decode_entities($a); $a = encode_entities_numeric($a); is $a, "Våre norske tegn bør æres", '... and encode_entities_numeric should also work.'; $a = "<&>"; is encode_entities($a), "<&>", 'We should be able to encode basic HTML entities'; $a = "<&>"; is encode_entities_numeric($a), "<&>", '... or encode them numerically, if desired.'; $a = "Våre norske tegn bør æres"; decode_entities($a); is($a, "Våre norske tegn bør æres", 'Decoding entities should work in void context'); encode_entities($a); is($a, "Våre norske tegn bør æres", '... and encoding entities should also work in void context'); $a = "abcdef"; is encode_entities($a, 'a-c'), "abcdef", 'We should be able to include the range of characters to encode.'; my $b = "<&>"; is(decode_entities([$a, $b]), [ 'abcdef', '<&>' ], "Decoding an array ref should work."); is(decode_entities($a, $b), [ 'abcdef', '<&>' ], "Decoding a list should work too."); =head # See how well it does against rfc1866... $ent = $plain = ""; while () { next unless /^\s*>>>$ent\n>>>>$plain\n"; $a = $ent; decode_entities($a); print "DDD>$a\n"; print "not " if $a ne $plain; print "ok 7\n"; # Try decoding when the ";" are left out $a = $ent, $a =~ s/;//g; decode_entities($a); print ";;;>$a\n"; print "not " if $a ne $plain; print "ok 8\n"; $a = $plain; encode_entities($a); print "EEE>$a\n"; print "not " if $a ne $ent; print "ok 9\n"; # From: Bill Simpson-Young # Subject: HTML entities problem with 5.11 # To: libwww-perl@ics.uci.edu # Date: Fri, 05 Sep 1997 16:56:55 +1000 # Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU> # # Hi. I've got a problem that has surfaced with the changes to # HTML::Entities.pm for 5.11 (it doesn't happen with 5.08). It's happening # in the process of encoding then decoding special entities. Eg, what goes # in as "abc&def&ghi" comes out as "abc&def;&ghi;". print "not " unless decode_entities("abc&def&ghi&abc;&def;") eq "abc&def&ghi&abc;&def;"; print "ok 10\n"; # Decoding of ' print "not " unless decode_entities("'") eq "'" && encode_entities("'", "'") eq "'"; print "ok 11\n"; # Quoted from rfc1866.txt 14. Proposed Entities The HTML DTD references the "Added Latin 1" entity set, which only supplies named entities for a subset of the non-ASCII characters in [ISO-8859-1], namely the accented characters. The following entities should be supported so that all ISO 8859-1 characters may only be referenced symbolically. The names for these entities are taken from the appendixes of [SGML]. Berners-Lee & Connolly Standards Track [Page 75] RFC 1866 Hypertext Markup Language - 2.0 November 1995 Berners-Lee & Connolly Standards Track [Page 76] RFC 1866 Hypertext Markup Language - 2.0 November 1995 =cut