The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Convert::Transcribe;

use 5.005;
use strict;
use Carp;

require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

@ISA = qw(Exporter);

%EXPORT_TAGS = ();

@EXPORT_OK = ();

@EXPORT = ();

$VERSION = '0.02';

sub new {
    my $class = shift;
    my $self = {};
    bless($self, $class);
    if (defined($_[0])) {
      if ($_[0] =~ /[\n\r]/) {
	fromstring($self, $_[0]);
      } else {
	fromfile($self, $_[0]);
      }
    }
    return $self;
}

sub fromfile {
  my $self = shift;
  my ($filename) = @_;
  open TLT, "<$filename" or croak "Cannot open $filename";
  while (<TLT>) {
    chomp;
    push @{$self->{DATA}}, $_;
  }
  close TLT;
  _genfunc($self);
}

sub fromstring {
  my $self = shift;
  my ($string) = @_;
  chomp($string);
  @{$self->{DATA}} = split(/[\r\n]+/, $string);
  _genfunc($self);
}


sub _genfunc {
  my $self = shift;
  my (@r, $t, $neg, $ch, $sub, @cond, $i);

  $sub = "my \$tr = '';\nwhile (length) {\n";
  @cond = ();

  foreach (@{$self->{DATA}}) {
    ($t, $neg) = ('', 0);
    s/\#.*//;
    s/([\'\"\/])/\\$1/g;
    while (/[\<\>]/) {
      s/([\<\>][^\<\>]+)// and push @cond, [split(' ', $1)];
    }
    @r = split;
    if (@r == 2) {

      foreach $i (@cond) {

	$t = shift @$i;

	if ($i->[0] eq '!') {
	  $ch = '!';
	  shift @$i;
	} else {
	  $ch = '=';
	}

	my $reg = '(';  my $del = '';
	foreach (@$i) {
	  $reg .= $del;

	  if ($_ eq '$') {
	    $reg .= ($t eq '>'? '\s|$': '^|\s');
	  } else {
	    $reg .= $_;
	  }
	  $del = '|';
	}
	$reg .= ')';

	if ($t eq '<') {
	  $sub .= " \$tr $ch~ /$reg\$/ and ";
	} elsif ($t eq '>') {
	  $sub .= " /^$r[0](?$ch$reg)/ and ";
	} else {
	  carp "Strange condition: " . join(' ', @$i), "\n";
	}
      }
      $sub .= " s/^$r[0]// and \$tr .= '$r[1]' and next;\n";
      @cond = ();
    }
  }
  $sub .= " s/^(.|\\n)//; \$tr .= \$1;\n  }\nreturn \$tr;\n";

  $self->{SUB} = $sub;
}

sub transcribe {
  my $self = shift;

  local ($_) = shift;
  eval $self->{SUB};
}

sub generated_code {
  my $self = shift;

  return $self->{SUB};
}

1;
__END__

=head1 NAME

Convert::Transcribe - Perl extension for transcribing natural languages

=head1 SYNOPSIS

  use Convert::Transcribe;

  $t = new Convert::Transcribe();
  $t->fromfile('filename');
  # or
  $t = new Convert::Transcribe();
  $t->fromstring("transcription def. containing newlines");
  # or
  $t = new Convert::Transcribe('filename');
  # or
  $t = new Convert::Transcribe("transcription def. containing newlines");

  $t->transcribe("text");

  $t->generated_code();   # for debugging

=head1 DESCRIPTION

Transcriptions are transformations of a text from one alphabet into
another in a way which feels natural to humans.

This module allows you to specify transcriptions in a notation which
hopefully feels more natural than using Perl regexps.

Transcription files look as follows:

  # a comment
  a  b  > a     # 'a' -> 'b' if followed by 'a'
  a  c  > ! b   # 'a' -> 'c' if not followed by 'b'
  a  d  < b     # 'a' -> 'd' if text transcribed ends in 'b'
  a  e  < ! b   # 'a' -> 'e' if text transcribed doesn't end in 'b'
  a  f  < $ > $ # 'a' -> 'f' if followed by a word boundary and the
                # text transcribed ends in a word boundary
  a  g          # 'a' -> 'g' otherwise

Transcription files can be loaded from text strings or from files.

The module converts your transcription file into some Perl code
which is then eval'ed when you call transcribe().  You may inspect
the code generated by calling generated_code().

=head2 EXPORT

None.

=head1 SEE ALSO

For transliteration (i.e., one-to-one mapping) you might prefer
Convert::Translit by Genji Schmeder (on CPAN).

=head1 TODO

There probably are a good number of bugs left.  Please report!

It would be nice to supply a good number of real-life transcription
definitions with the module.  Please contribute!

=head1 AUTHOR

Thomas M. Widmann, E<lt>twid@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2002, 2003 by Thomas M. Widmann

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut