#!perl -Tw
# Testing of the incremental parsing. Try to split a HTML document at
# every possible position and make sure that the result is the same as
# when parsing everything in one chunk.
# Now we use a shorter document, because we don't have all day on
# this.
my ($HTML, $notests);
BEGIN {
$HTML = <<'EOT';
Tittel
Overskrift
Text bold
italic
some entities (å)
EOT
$notests = length($HTML); # A test for each char in the test doc
$notests *= 3; # done twice
$notests += 4; # plus more for the the rest of the tests
}
use strict;
use Test::More tests=>$notests; # Tests
BEGIN {
use_ok( 'HTML::TreeBuilder');
}
my $h = new HTML::TreeBuilder;
isa_ok ( $h, "HTML::TreeBuilder");
$h->parse($HTML)->eof;
my $html = $h->as_HTML;
$h->delete;
# Each test here tries to parse the doc when we split it in two.
for my $pos (0 .. length($HTML) - 1) {
my $first = substr($HTML, 0, $pos);
my $last = substr($HTML, $pos);
is ($first.$last, $HTML, "File split okay");
my $h1;
eval {
$h1 = new HTML::TreeBuilder;
isa_ok( $h1, 'HTML::TreeBuilder' );
$h1->parse($first);
$h1->parse($last);
$h1->eof;
};
if ($@) {
print "Died when splitting at position $pos:\n";
my $before = 10;
$before = $pos if $pos < $before;
print "«", substr($HTML, $pos - $before, $before);
print "»\n«";
print substr($HTML, $pos, 10);
print "»\n";
print "not ok $pos\n";
$h1->delete;
next;
}
my $new_html = $h1->as_HTML;
my $before = 10;
$before = $pos if $pos < $before;
is($new_html, $html, "Still Parsing as the same after split at $pos") or
diag("Something is different when splitting at position $pos:\n",
"«", substr($HTML, $pos - $before, $before), "»\n«",
substr($HTML, $pos, 10), "»\n", "\n$html$new_html\n",
);
$h1->delete;
} # for
# Also try what happens when we feed the document one-char at a time
# print "#\n#\nNow parsing document once char at a time...\n";
my $perChar = new HTML::TreeBuilder;
isa_ok( $perChar, 'HTML::TreeBuilder' );
while ($HTML =~ /(.)/sg) {
$perChar->parse($1);
}
$perChar->eof;
my $new_html = $perChar->as_HTML;
is ($new_html, $html, "Testing per Char parsing");
$perChar->delete;