# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#########################
BEGIN {
$| = 1;
$GENERATE_TEST_FILES = $ENV{GENERATE_TEST_FILES};
@test_files = ("test1.html", "test2.html", "test3.html");
$number_of_tests_per_file = $GENERATE_TEST_FILES ? 0 : 8;
my $number_of_tests =
$number_of_tests_per_file * scalar(@test_files) * 2 + 1;
print "1..$number_of_tests\n";
}
END {print "not ok 1\n" unless $loaded;}
use Regexp::IgnoreHTML;
$loaded = 1;
print "ok 1\n";
chdir("t");
for (my $k = 0; $k <= 1; $k++) {
for (my $i = 0; $i < scalar(@test_files); $i++) {
my $j = ($i + $k * scalar(@test_files)) * $number_of_tests_per_file;
my $test_file = $test_files[$i];
my $test_file_cleaned = "html_cleaned_".$i."_".$k.".html";
my $test_file_delimited = "html_delimited_".$i."_".$k.".html";
my $test_file_merged = "html_merged_".$i."_".$k.".html";
my $test_file_counters = "html_counters_".$i."_".$k;
# get the original text
open(FILE, $test_file) || die("cannot open to read $test_file");
my $original_text;
read(FILE, $original_text, -s $test_file);
close(FILE);
# create the Regexp::IgnoreHTML object (that implements
# the get_tokens method)
my $rei = new Regexp::IgnoreHTML($original_text,
"<__INDEX__>");
if ($k == 0) { # stay with the defaults
$rei->space_after_non_text_characteristics_html(0);
}
elsif ($k == 1) {
$rei->space_after_non_text_characteristics_html(1);
}
unless ($GENERATE_TEST_FILES) {
print_ok($rei, 2 + $j, "Regexp::IgnoreHTML object was created");
}
# split
$rei->split();
if ($GENERATE_TEST_FILES) {
open(FILE, ">".$test_file_cleaned)
|| die("cannot open to write $test_file_cleaned");;
print FILE $rei->cleaned_text();
close(FILE);
}
else {
my $saved_results;
open(FILE, $test_file_cleaned)
|| die("cannot open to read $test_file_cleaned");
read(FILE, $saved_results, -s $test_file_cleaned);
close(FILE);
print_ok(($saved_results eq $rei->cleaned_text()), 3 + $j,
"Generated cleaned text");
}
if ($GENERATE_TEST_FILES) {
open(FILE, ">".$test_file_delimited)
|| die("cannot open to write $test_file_delimited");;
print FILE $rei->delimited_text();
close(FILE);
}
else {
my $saved_results;
open(FILE, $test_file_delimited)
|| die("cannot open to read $test_file_delimited");
read(FILE, $saved_results, -s $test_file_delimited);
close(FILE);
print_ok(($saved_results eq $rei->delimited_text()), 4 + $j,
"Generated delimited text");
}
my $counter1 =
$rei->s('(bla)_(\d+)','$2_$1','gi');
my $counter2 = $rei->s('(\d+)','$1','');
my $counter3 = $rei->s('general','GENERAL','gi');
my $counter4 = $rei->s('bla','blaaaa', 'i');
if ($GENERATE_TEST_FILES) {
open(FILE, ">".$test_file_counters)
|| die("cannot open to write $test_file_counters");
print FILE $counter1."\n".$counter2."\n".
$counter3."\n".$counter4."\n";
close(FILE);
}
else {
open(FILE, $test_file_counters)
|| die("cannot open to read $test_file_counters");
my ($saved_counter1, $saved_counter2,
$saved_counter3, $saved_counter4) = ;
chomp($saved_counter1, $saved_counter2,
$saved_counter3, $saved_counter4);
close(FILE);
print_ok($counter1 == $saved_counter1, 5 + $j,
"Substitution action");
print_ok($counter2 == $saved_counter2, 6 + $j,
"Second substitution action");
print_ok($counter3 == $saved_counter3, 7 + $j,
"Third substitution action");
print_ok($counter4 == $saved_counter4, 8 + $j,
"Third substitution action");
}
$rei->translation_position_factor(0);
my $cleaned_text = $rei->cleaned_text();
my $after_the_matach;
my $buffer = "";
my $last_position = 0;
# for each word
while ($cleaned_text =~ /Rani[\s\n]+Pinchuk/g) {
my $match = $&;
my $end_match_position = pos($cleaned_text) - 1;
my $match_length = length($match);
my $start_match_position = $end_match_position - $match_length + 1;
my $replacer = 'Rani Pinchuk';
$rei->replace(\$buffer,
\$last_position,
$start_match_position,
$end_match_position,
$replacer);
}
$buffer .= substr($rei->cleaned_text(), $last_position);
$rei->cleaned_text($buffer);
$rei->merge();
if ($GENERATE_TEST_FILES) {
open(FILE, ">".$test_file_merged)
|| die("cannot open to write $test_file_merged");
print FILE $rei->text();
close(FILE);
}
else {
my $saved_results;
open(FILE, $test_file_merged)
|| die("cannot open to read $test_file_merged");
read(FILE, $saved_results, -s $test_file_merged);
close(FILE);
print_ok(($saved_results eq $rei->text()), 9 + $j,
"Merged to get back the text");
}
}
}
#############################################
# print_ok ($expression, $number, $comment)
#############################################
sub print_ok {
my $expression = shift || 0;
my $number = shift;
my $string = shift || "";
$string = "ok " . $number . " " . $string . "\n";
if (! $expression) {
$string = "not " . $string;
}
print $string;
} # print_ok