package WWW::Translate::interNOSTRUM; use strict; use warnings; use Carp qw(carp); use WWW::Mechanize; use Encode; our $VERSION = '0.11'; my %lang_pairs = ( 'ca-es' => 'Catalan -> Spanish', # default 'es-ca' => 'Spanish -> Catalan', 'es-va' => 'Spanish -> Catalan with Valencian forms', ); my %output = ( plain_text => 'txtf', # default marked_text => 'txt', ); my %defaults = ( lang_pair => 'ca-es', output => 'plain_text', store_unknown => 0, ); sub new { my $class = shift; # validate overrides my %overrides = @_; foreach (keys %overrides) { # check key; warn if illegal carp "Unknown parameter: $_\n" unless exists $defaults{$_}; # check value; warn and delete if illegal if ($_ eq 'output' && !exists $output{$overrides{output}}) { carp _message($_, $overrides{$_}); delete $overrides{$_}; } if ($_ eq 'lang_pair' && !exists $lang_pairs{$overrides{lang_pair}}) { carp _message($_, $overrides{$_}); delete $overrides{$_}; } } # replace defaults with overrides my %args = (%defaults, %overrides); # remove invalid parameters my @fields = keys %defaults; my %this; @this{@fields} = @args{@fields}; if ($this{store_unknown}) { $this{unknown} = (); } $this{agent} = WWW::Mechanize->new(); $this{agent}->env_proxy(); $this{url} = 'http://www.internostrum.com/welcome.php'; return bless(\%this, $class); } sub translate { my $self = shift; my $string; if (@_ > 0) { $string = shift; } else { carp "Nothing to translate\n"; return ''; } return '' if ($string eq ''); $string = _fix_source($string); my $mech = $self->{agent}; $mech->get($self->{url}); unless ($mech->success) { carp $mech->response->status_line; return undef; } $mech->field("quadretext", $string); if ($self->{lang_pair} eq 'es-va') { $self->{lang_pair} = 'es-ca'; $mech->tick('valen', 1); } $mech->select("direccio", $self->{lang_pair}); $mech->select("tipus", $output{$self->{output}}); $mech->click(); my $response = $mech->content(); my $translated; if ($response =~ /spelling\.<\/div>\s*
(.+?)<\/p>/s) {
$translated = $1;
} else {
carp "Didn't receive a translation from the interNostrum server.\n" .
"Please check the length of the source text.\n";
return '';
}
# remove double spaces
$translated =~ s/(?<=\S)\s{2}(?=\S)/ /g;
# store unknown words
if ($self->{store_unknown} && $self->{output} eq 'marked_text') {
if ($translated =~ /(?:^|\W)\*/) {
my $source_lang = substr($self->{lang_pair}, 0, 2);
my $utf8 = decode('iso-8859-1', $translated);
while ($utf8 =~ /(?:^|\W)\*(\w+?)\b/g) {
my $detected = encode('iso-8859-1', $1);
$self->{unknown}->{$source_lang}->{$detected}++;
}
}
}
return $translated;
}
sub from_into {
my $self = shift;
if (@_) {
my $pair = shift;
$self->{lang_pair} = $pair if exists $lang_pairs{$pair};
} else {
return $self->{lang_pair};
}
}
sub output_format {
my $self = shift;
if (@_) {
my $format = shift;
$self->{output} = $format if exists $output{$format};
} else {
return $self->{output};
}
}
sub get_unknown {
my $self = shift;
if (@_ && $self->{store_unknown}) {
my $lang_code = shift;
if ($lang_code =~ /^(?:es|ca)$/) {
return $self->{unknown}->{$lang_code};
} else {
carp "Invalid language code\n";
}
} else {
carp "I'm not configured to store unknown words\n";
}
}
sub _message {
my ($key, $value) = @_;
my $string = "Invalid value for parameter $key, $value.\n" .
"Will use the default value instead.\n";
return $string;
}
sub _fix_source {
my ($string) = @_;
# fix geminated l; replace . by chr(183) = hex B7
$string =~ s/l\.l/l\xB7l/g;
return $string;
}
1;
__END__
=head1 NAME
WWW::Translate::interNOSTRUM - Catalan < > Spanish machine translation
=head1 VERSION
Version 0.11 September 20, 2007
=head1 SYNOPSIS
use WWW::Translate::interNOSTRUM;
my $engine = WWW::Translate::interNOSTRUM->new();
my $translated_string = $engine->translate($string);
# default language pair is Catalan -> Spanish
# change to Spanish -> Catalan:
$engine->from_into('es-ca');
# check current language pair:
my $current_langpair = $engine->from_into();
# default output format is 'plain_text'
# change to 'marked_text':
$engine->output_format('marked_text');
# check current output format:
my $current_format = $engine->output_format();
# configure a new interNOSTRUM object to store unknown words:
my $engine = WWW::Translate::interNOSTRUM->new(
output => 'marked_text',
store_unknown => 1,
);
# get unknown words for source language = Spanish:
my $es_unknown_href = $engine->get_unknown('es');
=head1 DESCRIPTION
interNOSTRUM is a Catalan < > Spanish machine translation engine developed by
the Department of Software and Computing Systems of the University of Alicante
in Spain. This module provides an OO interface to the interNOSTRUM online
translation engine.
interNOSTRUM provides approximate translations of Catalan into Spanish and
Spanish into Catalan. It generates both the central variant of Oriental
Catalan (the standard variant used in Catalonia) and Valencian forms,
which follow the recommendations published in
L