#!/usr/bin/perl -w # Copyright (c) 2002, Sam Vilain. All rights reserved. This program # is free software; you may use it under the same terms as Perl # itself. package Lingua::Translate::SysTran; use strict; use Carp; # package globals: # %config is default values to use for new objects # %servers is a hash from a translation pair to a hostname/port number # %one_letter_codes is actually a constant, it is for default port # number calculation use vars qw($VERSION %config %servers %one_letter_codes); # WARNING: Some constants have their default values extracted from the # POD. See the Pod::Constants man page. =head1 NAME Lingua::Translate::SysTrans - Translation back-end for SysTran's enterprise translation server, version 0.01 (European languages only) =head1 SYNOPSIS use Lingua::Translate; Lingua::Translate::config ( back_end => "SysTran", host => "babelfish.mydomainname.com", ); my $xl8r = Lingua::Translate->new(src => "de", dest => "en"); # prints "My hovercraft is full of eels" print $xl8r->translate("Mein Luftkissenfahrzeug ist voll von den Aalen"); =head1 DESCRIPTION Lingua::Translate::SysTran is a translation back-end for Lingua::Translate that contacts a SysTran translation server to do the real work. You should try to avoid putting the config() command that sets the location of the server in all of your scripts; make a little configuration module or put it in a script you can `require'. =head1 CONSTRUCTOR =head2 new(src => $lang, dest => lang, option => $value) Creates a new translation handle. This won't initiate a connection until you try to translate something. =over =item src Source language, in RFC-3066 form. See L for a discussion of RFC-3066 language tags. =item dest Destination Language =item host Specify the host to contact =item port Specify the port number =back =cut use I18N::LangTags qw(is_language_tag); sub new { my ($class, %options) = (@_); my $self = bless { %config }, $class; croak "Must supply source and destination language" unless (defined $options{src} and defined $options{dest}); is_language_tag($self->{src} = delete $options{src}) or croak "$self->{src} is not a valid RFC3066 language tag"; is_language_tag($self->{dest} = delete $options{dest}) or croak "$self->{dest} is not a valid RFC3066 language tag"; $self->config(%options); $self->{pair} = $self->{src} . "_" . $self->{dest}; my $custom_port = $servers{$self->{pair}}; if ( defined $custom_port ) { ($self->{host}, $self->{port}) = ($custom_port =~ m/^(.*)(?: (:\d+) )$/); } $self->{port} ||= _default_port($self->{pair}); return $self; } =head1 METHODS The following methods may be called on Lingua::Translate::SysTran objects. =head2 translate($text) : $translated Translates the given text. die's on any kind of error. =cut use IO::Socket; BEGIN { # use Unicode::MapUTF8 if it is available eval "use Unicode::MapUTF8 qw(from_utf8 to_utf8);"; if ( $@ ) { eval 'no strict; sub from_utf8 { %a=(@_); $a{"-string"} } '. '*{to_utf8} = \&from_utf8'; } }; sub translate { my $self = shift; UNIVERSAL::isa($self, __PACKAGE__) or croak __PACKAGE__."::translate() called as function"; # every back-end we know of speaks ISO-8859-1 my $text = from_utf8( -string => (shift), -charset => "iso-8859-1" ); my $translated; my $request = ( "METHOD=SOCKET\n". "ACTION=TRANSLATE\n". "SOURCE-CONTENT=".length($text)."\n". "$text\n" ); my $socket = IO::Socket::INET->new ( Proto => 'tcp', PeerAddr => $self->{host}, PeerPort => $self->{port}, Reuse => 1, ); $self->_barf("Connection failed; $!") unless $socket; ## Sending request $socket->write($request, length($request)) || $self->_barf ('write failed; '.$!); $socket->flush; ## Then waiting for answer my ($error, $error_message, $time); while ($_ = $socket->getline()) { my ($command, $value) = (m/^([\w\-]+)=(.*)$/) or $self->_barf("protocol error"); if ( $command eq "ERR" ) { $error = $value; } elsif ( $command eq "TIME" ) { $time = $value; } elsif ( $command eq "EMSG" ) { $error_message = $value; } elsif ( $command eq "OUTPUT-CONTENT" ) { # data always follows my $bytes_read = $socket->read($translated, $value); ($bytes_read == $value) or $self->_barf("short read"); last; } else { $self->_barf("protocol mismatch; $command"); } } # close connection $socket->close; $self->_barf($error_message) if $error; # trim excess line feeds at end of string $translated =~ s/\n*$//; return to_utf8( -string => $translated, -charset => "iso-8859-1" ); } sub _barf { my $self = shift; my $message = shift; die ($message . " talking to $self->{host}:$self->{port} " .$self->{pair} ); } =head2 available() : @list Returns a list of available language pairs, in the form of "XX_YY", where XX is the source language and YY is the destination. If you want the english name of a language tag, call I18N::LangTags::List::name() on it. See L. If you call this function without configuring the package, it returns all of the languages that there are known back-ends for. =cut sub available { my $self = shift; UNIVERSAL::isa($self, __PACKAGE__) or croak __PACKAGE__."::available() called as function"; my @a = keys %one_letter_codes; # English; "the new universal language?" # mi spitu fo le bango pe le glico return ( keys %servers || grep /en/, ( map { my $a=$_; map{"${_}_$a"} my @a } @a ) ); } =head1 CONFIGURATION FUNCTIONS =head2 config(option => $value) This function sets defaults for use when constructing objects. =cut sub config { my $self; if ( UNIVERSAL::isa($_[0], __PACKAGE__) ) { $self = shift; } else { $self = \%config; } while ( my ($option, $value) = splice @_, 0, 2 ) { if ( $option eq "pairs" ) { # configure a pair while ( my ($pair, $server) = each %$value ) { $servers{$pair} = $server; } } elsif ( $option =~ m/^(host|port)$/) { # configure host/port $self->{$option} = $value; } else { croak "Unknown configuration option $option"; } } } =over =item host Defines the hostname to use if no hostname/port is defined for a language pair. The default value is "localhost". Do not specify a port number. =item servers The value to this configuration option must be a hash reference from a language pair (in XX_YY form) to a hostname, optionally followed by a colon and a port number. If this configuration option is defined, then attempts to translate undefined languages will fail. There is no default value for this option. =back =head1 A Note on default port numbers Returns the host name and port number for the given language pair. To determine the default port number, take the one-letter code for the language from the below table, express as a number in base 25 (A=0, B=1, etc) and then add 10000 decimal. Eg en => de would be EG, which is 106 decimal, or port 10106. =head2 ONE LETTER LANGUAGE CODES en => E de => G it => I fr => F pt => P es => S el => K =cut sub _default_port { my $pair = shift; my ($src, $tgt) = ($pair =~ m/^(..)_(..)/) or croak "$pair is not a valid language pair"; # FIXME - won't work on EBCDIC systems my $A = ord("A"); my $num = ( (ord($one_letter_codes{$src}) - $A) * 25 +ord($one_letter_codes{$tgt}) - $A ); return $num + 10000; } # extract configuration options from the POD use Pod::Constants 'NAME' => sub { ($VERSION) = (m/(\d+\.\d+)/); }, 'CONFIGURATION FUNCTIONS' => sub { Pod::Constants::add_hook ('*item' => sub { my ($varname) = m/(\w+)/; #my ($default) = m/The default value is\s+"(.*)"\./s; my ($default) = m/The default value is\s+"(.*)"/s; config($varname => $default) if $default; } ); Pod::Constants::add_hook ( '*back' => sub { # an ugly hack? $config{agent} .= $VERSION; Pod::Constants::delete_hook('*item'); Pod::Constants::delete_hook('*back'); } ); }, 'ONE LETTER LANGUAGE CODES' => \%one_letter_codes; =head1 BUGS/TODO No support for non-ISO-8859-1 character sets - with the software I have, there is no option. =head1 SEE ALSO L, L, L =head1 AUTHOR Sam Vilain, =cut 1;