# -*-Perl-*- # Time-stamp: "2003-09-15 01:48:48 ADT" # # Testing of the incremental parsing. Try to split a HTML document at # every possible position and make sure that the result is the same as # when parsing everything in one chunk. # Now we use a shorter document, because we don't have all day on # this. $HTML = <<'EOT'; Tittel

Overskrift

Text bold italic some entities (å) EOT $| = 1; $notests = length($HTML); print "1..$notests\n"; use HTML::TreeBuilder; print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n"; print "#Using HTML::Element version v$HTML::Element::VERSION\n"; print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; print "# Running under perl version $] for $^O", (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n" if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); print "# MacPerl verison $MacPerl::Version\n" if defined $MacPerl::Version; printf "# Current time local: %s\n# Current time GMT: %s\n", scalar(localtime($^T)), scalar(gmtime($^T)); $h = new HTML::TreeBuilder; $h->parse($HTML)->eof; $html = $h->as_HTML; $h->delete; { my $h = $html; $h =~ s/^/# /mg; print "# Parsing: $h#\n"; } # Each test here tries to parse the doc when we split it in two. for $pos (1 .. length($HTML) - 1) { $first = substr($HTML, 0, $pos); $last = substr($HTML, $pos); die "This is bad" unless $HTML eq ($first . $last); eval { $h = new HTML::TreeBuilder; $h->parse($first); $h->parse($last); $h->eof; }; if ($@) { print "Died when splitting at position $pos:\n"; $before = 10; $before = $pos if $pos < $before; print "«", substr($HTML, $pos - $before, $before); print "»\n«"; print substr($HTML, $pos, 10); print "»\n"; print "not ok $pos\n"; $h->delete; next; } $new_html = $h->as_HTML; if ($new_html ne $html) { print "\n\nSomething is different when splitting at position $pos:\n"; $before = 10; $before = $pos if $pos < $before; print "«", substr($HTML, $pos - $before, $before); print "»\n«"; print substr($HTML, $pos, 10); print "»\n"; print "\n$html$new_html\n"; print "not ok $pos"; } else { print "ok $pos\n"; } $h->delete; } # Also try what happens when we feed the document one-char at a time print "#\n#\nNow parsing document once char at a time...\n"; $h = new HTML::TreeBuilder; while ($HTML =~ /(.)/sg) { $h->parse($1); } $h->eof; $new_html = $h->as_HTML; if ($new_html ne $html) { print "Also different when parsed one char at a time\n"; print "\n$html$new_html\n"; $BAD = 1; } print join '', $BAD ? "not " : '', "ok $notests\n";