The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -s

use Lingua::PT::PLNbase;
use Lingua::NATools::ConfigData;
use warnings;
use strict;

our ($h);
our ($tmx,$single,$raw, $d, $D);

sub usage {
    print "nat-sentence-align: simple interface for Vanilla aligner\n\n";
    print "\tnat-sentence-align [-tmx] [-single] [-d=.EOS] [-D=.EOP] <f1> <f2>\n\n";
    print "For more help, please run 'perldoc nat-sentence-align'\n";
    exit;
}

usage() if ($h);

my $BINPREFIX = Lingua::NATools::ConfigData->config('bindir');
my $command = "";

my $file1 = shift or die "I need two files to align\n";
my $file2 = shift or die "I need two files to align\n";

if ($tmx && $single) { undef $single }

unless ($raw) {
  $/ = "\n\n";
  for my $file ($file1, $file2) {
    open I, $file or die "Cannot open file '$file': $!\n";
    open O, ">$file.tok" or die "Cannot open file '$file.tok': $!\n";
    while (<I>) {
      next if m!^(\s\n)*$!;
      s/\n/ /g;
      s/\ +/ /g;

      my @sentences = sentences($_);
      for my $s (@sentences) {

	my @atomos = atomiza($s);
	print O join("\n",@atomos),"\n.EOS\n";

      }
      print O ".EOP\n";
    }
    close O;
    close I;
  }
  $command .= " -d .EOS -D .EOP $file1.tok $file2.tok";
} else {
  die "Using 'raw', you must supply -d and -D" if ($raw && (!$d || !$D));
  $command .= " -d $d -D $D $file1 $file2";
}


$command = " -s$command" if ($single);


$command = "$BINPREFIX/nat-sentalign$command";
system($command);
#print $command;

if ($tmx) {
  # files are $file1.tok.al e $file2.tok.al
  # langs...
  local $/ = "\n";
  my ($lang1, $lang2);
  print STDOUT "Please enter language code for '$file1': ";
  chomp($lang1 = <STDIN>);

  print STDOUT "Please enter language code for '$file2': ";
  chomp($lang2 = <STDIN>);

  print STDOUT "Producing TMX '$lang1-$lang2.tmx'...\n";
  `nat-pair2tmx $file1.tok.al $lang1 $file2.tok.al $lang2 > $file1-$file2.tmx`;
}


=encoding UTF-8

=head1 NAME

nat-sentenec-align - simple interface for Vanilla aligner.

=head1 SYNOPSIS

  nat-sentence-align [-tmx] [-single] [-d=.EOS] [-D=.EOP] <f1> <f2>

=head1 DESCRIPTION

This command is a frontend for the Vanilla aligner supplied with
NATools. To use it you must supply two parallel files with soft and
hard delimiters for sincronization.

By default these delimiters are '.EOS' and '.EOP'. You can change them
using the C<-d> and C<-D> switches.

At the end it creates a pair of sentence-aligned files. You can supply
a C<-tmx> option to force the creation of a TMX, or the C<-single>
option to force the creation of a single file with the two languages
(mainly used for debugging).

=head1 SEE ALSO

NATools Documentation, perl(1)

=head1 AUTHOR

Alberto Manuel Brandão Simões, E<lt>ambs@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006-2009 by Alberto Manuel Brandão Simões

=cut