# $Id: 02parse.t,v 1.15 2002/10/14 20:15:37 phish Exp $ ## # this test checks the parsing capabilities of XML::LibXML # it relies on the success of t/01basic.t use Test; use IO::File; BEGIN { plan tests => 460 }; use XML::LibXML; use XML::LibXML::Common qw(:libxml); use XML::LibXML::SAX; use XML::LibXML::SAX::Builder; use constant XML_DECL => "\n"; ## # test values my @goodWFStrings = ( '', '', XML_DECL . "", ''."\n", ''."\n", XML_DECL. " \n", XML_DECL. ' ', XML_DECL. ' ', XML_DECL. '&"\']]>', XML_DECL. '<>&"'', XML_DECL. '  ', XML_DECL. 'foo', XML_DECL. 'foo', XML_DECL. 'foo', XML_DECL. '', XML_DECL. '', XML_DECL. '', ); my @goodWFNSStrings = ( XML_DECL. ''."\n", XML_DECL. ''."\n", XML_DECL. ''."\n", XML_DECL. ''."\n", XML_DECL. ''."\n", ); my @goodWFDTDStrings = ( XML_DECL. ''."\n".']>'."\n".'&foo;', XML_DECL. ']>&foo;', XML_DECL. ']>&foo;>', XML_DECL. ']>&foo;>', XML_DECL. ']>&foo;>', XML_DECL. ']>', XML_DECL. ']>', ); my @badWFStrings = ( "", # totally empty document XML_DECL, # only XML Declaration "", # comment only is like an empty document ']>', # no good either ... "", # single tag (tag mismatch) "foo", # trailing junk "foo", # leading junk "", # bad attribute '&", # bad char "�x20;", # bad char "", # bad encoding "&foo;", # undefind entity ">", # unterminated entity XML_DECL. ']>', # bad placed entity XML_DECL. ']>', # even worse "", # bad comment '', # bad either... (is this conform with the spec????) ); my %goodPushWF = ( single1 => [''], single2 => ['',''], single3 => [ XML_DECL, "", "" ], single4 => [""], single5 => ["<", "foo","bar", "/>"], single6 => ['',"\n"], single7 => ['',"\n"], single8 => [''], single9 => ['',"\n"], multiple1 => [ '','',' ', ], multiple2 => [ '<','/foobar> ', ], multiple3 => [ '','&"\']]>',''], multiple4 => [ '','&', ']]>', '' ], multiple5 => [ '','&', ']]>', '' ], multiple6 => ['','<>&"'',''], multiple6 => ['','<',';&','gt;&a','mp;','"&ap','os;',''], multiple7 => [ '', '  ','' ], multiple8 => [ '', '&#x','20;','60;','' ], multiple9 => [ '','moo','moo',' ', ], multiple10 => [ '','moo',' ', ], comment1 => [ '','' ], comment2 => [ '','' ], comment3 => [ '','' ], comment4 => [ '','' ], comment5 => [ 'fo','o', wellformed7 => '', wellformed8 => '', wellformed9 => 'D', wellformed10 => '', wellbalance1 => '', wellbalance2 => '', wellbalance3 => '', wellbalance4 => 'DI', wellbalance5 => '', wellbalance6 => '', wellbalance7 => '', wellbalance8 => 'DD', wellbalance9 => 'D', wellbalance10=> 'DD', wellbalance11=> 'D', wellbalance12=> 'D', wellbalance13=> 'D', wellbalance14=> '', wellbalance15=> '', wellbalance16=> 'D', ); my @badWBStrings = ( "", "", "bar", "bar", "&foo;", # undefined entity "&", # bad char "häh?", # bad encoding "", # bad stays bad ;) "", # bad stays bad ;) ); my $parser = XML::LibXML->new; print "# 5.1 DOM CHUNK PARSER\n"; for ( 1..$MAX_WF_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) { $_--; } ok($frag->toString,$chunks{'wellformed'.$_}); next; } } ok(0); } for ( 1..$MAX_WB_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) { $_--; } ok($frag->toString,$chunks{'wellbalance'.$_}); next; } ok(0); } eval { my $fail = $parser->parse_xml_chunk(undef); }; ok($@); eval { my $fail = $parser->parse_xml_chunk(""); }; ok($@); foreach my $str ( @badWBStrings ) { eval { my $fail = $parser->parse_xml_chunk($str); }; ok($@); } print "# 5.2 SAX CHUNK PARSER\n"; my $handler = XML::LibXML::SAX::Builder->new(); $parser->set_handler( $handler ); for ( 1..$MAX_WF_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) { $_--; } ok($frag->toString,$chunks{'wellformed'.$_}); next; } } ok(0); } for ( 1..$MAX_WB_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) { $_--; } ok($frag->toString,$chunks{'wellbalance'.$_}); next; } ok(0); } } { print "# 6 VALIDATING PARSER\n"; my %badstrings = ( SIMPLE => ''."\n\n", ); my $parser = XML::LibXML->new; $parser->validation(1); my $doc; eval { $doc = $parser->parse_string($badstrings{SIMPLE}); }; ok( $@ ); my $ql; } sub tsub { my $doc = shift; my $th = {}; $th->{d} = XML::LibXML::Document->createDocument; my $e1 = $th->{d}->createElementNS("x","X:foo"); $th->{d}->setDocumentElement( $e1 ); my $e2 = $th->{d}->createElementNS( "x","X:bar" ); $e1->appendChild( $e2 ); $e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) ); return $th->{d}; } sub tsub2 { my ($doc,$query)=($_[0],@{$_[1]}); # return [ $doc->findnodes($query) ]; return [ $doc->findnodes(encodeToUTF8('iso-8859-1',$query)) ]; }