package Lingua::UK::Translit; use 5.006; use strict; use warnings; use utf8; require Exporter; our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( &uk2ascii ); our $VERSION = '0.10'; my %ua2en = ( 'а' => 'a', 'А' => 'A', 'б' => 'b', 'Б' => 'B', 'в' => 'v', 'В' => 'V', 'г' => 'h', 'Г' => 'H', 'ґ' => 'g', 'Ґ' => 'G', 'д' => 'd', 'Д' => 'D', 'е' => 'e', 'Е' => 'E', 'є' => 'ie', 'Є' => 'Ie', 'ж' => 'zh', 'Ж' => 'Zh', 'з' => 'z', 'З' => 'Z', 'и' => 'y', 'И' => 'Y', 'і' => 'i', 'І' => 'I', 'ї' => 'i', 'Ї' => 'I', 'й' => 'i', 'Й' => 'I', 'к' => 'k', 'К' => 'K', 'л' => 'l', 'Л' => 'L', 'м' => 'm', 'М' => 'M', 'н' => 'n', 'Н' => 'N', 'о' => 'o', 'О' => 'O', 'п' => 'p', 'П' => 'P', 'р' => 'r', 'Р' => 'R', 'с' => 's', 'С' => 'S', 'т' => 't', 'Т' => 'T', 'у' => 'u', 'У' => 'U', 'ф' => 'f', 'Ф' => 'F', 'х' => 'kh', 'Х' => 'Kh', 'ц' => 'ts', 'Ц' => 'Ts', 'ч' => 'ch', 'Ч' => 'Ch', 'ш' => 'sh', 'Ш' => 'Sh', 'щ' => 'sch', 'Щ' => 'Sch', 'ь' => '\'', 'Ь' => '\'', 'ю' => 'iu', 'Ю' => 'Iu', 'я' => 'ia', 'Я' => 'Ia' ); my %ua2enwb = ( 'є' => 'ye', 'Є' => 'Ye', 'ї' => 'y', 'Ї' => 'Y', 'й' => 'y', 'Й' => 'Y', 'ю' => 'yu', 'Ю' => 'Yu', 'я' => 'ya', 'Я' => 'Ya' ); sub uk2ascii { my $strin = shift; my @words = split ('\b',$strin); my $strans = ''; foreach my $word (@words){ my @c = split('',$word); my $wtrans = ''; for ( my $i = 0; $i <= $#c; $i++){ if ( ($i == 0) and (exists $ua2enwb{$c[0]}) ){ $wtrans .= $ua2enwb{$c[0]}; } elsif (exists $ua2en{$c[$i]}){ if ( ($c[$i] eq 'г') and (($c[$i-1] eq 'з') or ($c[$i-1] eq 'З')) ){ $wtrans .= 'gh'; } elsif ( ($c[$i] eq 'Г') and (($c[$i-1] eq 'з') or ($c[$i-1] eq 'З')) ){ $wtrans .= 'Gh'; } else { $wtrans .= $ua2en{$c[$i]}; } } else { $wtrans .= $c[$i]; } } $strans .= $wtrans; } return $strans; } 1; __END__ =head1 NAME Lingua::UK::Translit - Perl extension for correct transliteration of Ukrainian text in UTF-8 encoding to Latin symbols. =head1 SYNOPSIS use utf8; use Lingua::UK::Translit; my $ukrainian_text="Україна"; print uk2ascii( $ukrainian_text ), "\n"; =head1 DESCRIPTION Lingua::UK::Translit is collection of some functions for proper transliteration of Ukrainian text in UTF-8 encoding to Latin symbols. Consists of functions for proper text transliteration. Works only with UTF-8 encoding. Returns all symbols in UTF-8 encoding. =head2 Functions =over 4 =item * uk2ascii($ukrainian_text) ,where $ukrainian_text - text in UTF-8 encoding. Returns transliterated text in Latin symbols, but encoded as UTF-8. Transliterates only letters of Ukrainian alphabet, other symbols leaves untouched. Preserves formatting and punctuation. =back =head2 EXPORT sub uk2ascii() =head1 SEE ALSO perl(1) - Practical Extraction and Report Language Lingua::UK::Jcuken - Conversion between QWERTY and JCUKEN keys in Ukrainian Lingua::RU::Jcuken - Conversion between QWERTY and JCUKEN keys in Russian Lingua::RU::PhTranslit - Writing cyrillic(russian) symbols by ASCII symbols (0x20-0x7f) Lingua::RU::Translit - Converts from Russian "translit" encoding to russian in koi8-r =head1 AUTHOR O. Y. Panchuk, Eolex@ucu.edu.uaE =head1 COPYRIGHT AND LICENSE Copyright (C) 2006 by O. Y. Panchuk This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut