#!/usr/bin/perl =head1 NAME gram-ga.pl - Check the grammar of Irish language OpenOffice documents =head1 SYNOPSIS B I I =head1 DESCRIPTION This script checks the grammar of the Irish language OpenOffice document given as the first argument, and creates a new document with the errors highlighted and annotated with appropriate messages. =head1 REQUIRES Perl 5.8, Lingua::GA::Gramadoir =head1 SEE ALSO =over 4 =item * L =item * L =item * L =back =head1 AUTHOR Kevin P. Scannell, Escannell@slu.eduE. =head1 COPYRIGHT AND LICENSE Copyright (C) 2004 Kevin P. Scannell This is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.2 or, at your option, any later version of Perl 5 you may have available. =cut use strict; use warnings; use utf8; use Archive::Zip qw( :ERROR_CODES); use Lingua::GA::Gramadoir; use Encode qw(decode encode); my $debug = 0; if (scalar @ARGV != 2) { print "Usage: $0 filetocheck[.sxw] errorfile[.sxw]\n"; exit 1; } my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime time; my $datestr = sprintf("%04d-%02d-%02d", $year+1900, $mon+1, $mday); # style definition for preamble # double-wavy red underline my $style = ''; # wavy green underline # my $style = ''; # yellow highlighter # my $style = ''; # annotation markup my $ann = ""; my $closeann = ''; # markup to insert before each error my $markup = ''; my $closemarkup = $markup; $closemarkup =~ s/[ >].*/>/; $closemarkup =~ s/new(); die "error reading $filetocheck" unless $zip->read($filetocheck) == AZ_OK; my $xml = $zip->contents('content.xml'); $xml =~ s/"/\\"/g; $xml =~ s/</\\/g; $xml =~ s/'/'/g; $xml =~ s/&/\\&/g; print STDERR "Unpacked contents.xml, converted char entities...\n" if ($debug); my $gr = new Lingua::GA::Gramadoir( # fix_spelling => 1, interface_language => 'ga', input_encoding => 'utf-8', ); print STDERR "Gramadoir object created...\n" if ($debug); my $errs = $gr->grammatical_errors($xml); print STDERR "Grammatical errors found...\n" if ($debug); $xml = decode("utf-8", $xml); my @xmllines = split /\n/, $xml; my $xmlans; my $curr_y = 1; my $curr_x = 0; my ($f_y, $f_x, $t_y, $t_x, $errmsg); foreach (@$errs) { ($f_y, $f_x, $t_y, $t_x, $errmsg) = m/^$/; while ($curr_y < $f_y) { $xmlans .= substr($xmllines[$curr_y-1], $curr_x)."\n"; $curr_y++; $curr_x = 0; } $xmlans .= substr($xmllines[$f_y - 1], $curr_x, $f_x - $curr_x); $curr_x = $f_x; my $errorspan=''; while ($curr_y < $t_y) { $errorspan .= substr($xmllines[$curr_y-1], $curr_x)."\n"; $curr_y++; $curr_x = 0; } $t_x++; # first char after error $errorspan .= substr($xmllines[$t_y - 1], $curr_x, $t_x - $curr_x); $errorspan =~ s/((\s*<[^>]+>\s*)+)/$closemarkup$1$markup/g; $errorspan =~ s/^/$markup/; $errorspan =~ s/$/$closemarkup/; $curr_x = $t_x; $xmlans .= $errorspan; $xmlans .= $ann; $errmsg =~ s/\//\\"/g; $xmlans .= $errmsg; $xmlans .= $closeann; } print STDERR "All error markup inserted...\n" if ($debug); $xmlans .= substr($xmllines[$curr_y - 1], $t_x); $curr_y++; while ($curr_y <= @xmllines) { $xmlans .= $xmllines[$curr_y-1]."\n"; $curr_y++; } print STDERR "New XML completed...\n" if ($debug); $xmlans = encode("utf-8", $xmlans); $xmlans =~ s/\\&/&/g; $xmlans =~ s/\\"/"/g; $xmlans =~ s/\\/>/g; $xmlans =~ s/'/'/g; print STDERR "New XML converted to utf-8 octets...\n" if ($debug); # insert description of gramadoir style in preamble $xmlans =~ s/(?<=)/$style/; $xmlans =~ s/(?<=/>$style<\/office:automatic-styles>/; $zip->contents('content.xml', $xmlans); print STDERR "New XML replaces old in contents.xml...\n" if ($debug); die "could not write to $errorfile" unless ($zip->writeToFileNamed( $errorfile ) == AZ_OK); print STDERR "Zip file written...\n" if ($debug); exit 0;