package Convert::Transcribe; use 5.005; use strict; use Carp; require Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Exporter); %EXPORT_TAGS = (); @EXPORT_OK = (); @EXPORT = (); $VERSION = '0.02'; sub new { my $class = shift; my $self = {}; bless($self, $class); if (defined($_[0])) { if ($_[0] =~ /[\n\r]/) { fromstring($self, $_[0]); } else { fromfile($self, $_[0]); } } return $self; } sub fromfile { my $self = shift; my ($filename) = @_; open TLT, "<$filename" or croak "Cannot open $filename"; while () { chomp; push @{$self->{DATA}}, $_; } close TLT; _genfunc($self); } sub fromstring { my $self = shift; my ($string) = @_; chomp($string); @{$self->{DATA}} = split(/[\r\n]+/, $string); _genfunc($self); } sub _genfunc { my $self = shift; my (@r, $t, $neg, $ch, $sub, @cond, $i); $sub = "my \$tr = '';\nwhile (length) {\n"; @cond = (); foreach (@{$self->{DATA}}) { ($t, $neg) = ('', 0); s/\#.*//; s/([\'\"\/])/\\$1/g; while (/[\<\>]/) { s/([\<\>][^\<\>]+)// and push @cond, [split(' ', $1)]; } @r = split; if (@r == 2) { foreach $i (@cond) { $t = shift @$i; if ($i->[0] eq '!') { $ch = '!'; shift @$i; } else { $ch = '='; } my $reg = '('; my $del = ''; foreach (@$i) { $reg .= $del; if ($_ eq '$') { $reg .= ($t eq '>'? '\s|$': '^|\s'); } else { $reg .= $_; } $del = '|'; } $reg .= ')'; if ($t eq '<') { $sub .= " \$tr $ch~ /$reg\$/ and "; } elsif ($t eq '>') { $sub .= " /^$r[0](?$ch$reg)/ and "; } else { carp "Strange condition: " . join(' ', @$i), "\n"; } } $sub .= " s/^$r[0]// and \$tr .= '$r[1]' and next;\n"; @cond = (); } } $sub .= " s/^(.|\\n)//; \$tr .= \$1;\n }\nreturn \$tr;\n"; $self->{SUB} = $sub; } sub transcribe { my $self = shift; local ($_) = shift; eval $self->{SUB}; } sub generated_code { my $self = shift; return $self->{SUB}; } 1; __END__ =head1 NAME Convert::Transcribe - Perl extension for transcribing natural languages =head1 SYNOPSIS use Convert::Transcribe; $t = new Convert::Transcribe(); $t->fromfile('filename'); # or $t = new Convert::Transcribe(); $t->fromstring("transcription def. containing newlines"); # or $t = new Convert::Transcribe('filename'); # or $t = new Convert::Transcribe("transcription def. containing newlines"); $t->transcribe("text"); $t->generated_code(); # for debugging =head1 DESCRIPTION Transcriptions are transformations of a text from one alphabet into another in a way which feels natural to humans. This module allows you to specify transcriptions in a notation which hopefully feels more natural than using Perl regexps. Transcription files look as follows: # a comment a b > a # 'a' -> 'b' if followed by 'a' a c > ! b # 'a' -> 'c' if not followed by 'b' a d < b # 'a' -> 'd' if text transcribed ends in 'b' a e < ! b # 'a' -> 'e' if text transcribed doesn't end in 'b' a f < $ > $ # 'a' -> 'f' if followed by a word boundary and the # text transcribed ends in a word boundary a g # 'a' -> 'g' otherwise Transcription files can be loaded from text strings or from files. The module converts your transcription file into some Perl code which is then eval'ed when you call transcribe(). You may inspect the code generated by calling generated_code(). =head2 EXPORT None. =head1 SEE ALSO For transliteration (i.e., one-to-one mapping) you might prefer Convert::Translit by Genji Schmeder (on CPAN). =head1 TODO There probably are a good number of bugs left. Please report! It would be nice to supply a good number of real-life transcription definitions with the module. Please contribute! =head1 AUTHOR Thomas M. Widmann, Etwid@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2002, 2003 by Thomas M. Widmann This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut