# Package Convert::Cyrillic # Version 1.02 # Part of "Cyrillic Software Suite" # Get docs and newest version from # http://www.neystadt.org/cyrillic/ # # Copyright (c) 1997-98, John Neystadt # You may install this script on your web site for free # To obtain permision for redistribution or any other usage # contact john@neystadt.org. # # Drop me a line if you deploy this script on tyour site. package Convert::Cyrillic; $VERSION = "1.02"; =head1 NAME Convert::Cyrillic v1.02 - Routines for converting from one cyrillic charset to another. =cut use Unicode::Map8; use Unicode::String; $UCase {'KOI'} = "\377"; $LCase {'KOI'} = "ţ"; $UCase {'WIN'} = "Ũ"; $LCase {'WIN'} = "\377"; $UCase {'DOS'} = ""; $LCase {'DOS'} = "񦧨"; $tab{"KOI8"}="\377ţ"; $tab{"DOS"}="񦧨"; $tab{"ISO"}=""; $tab{"WIN"}="Ũ\377"; $tab{"VOL"}="ABVGDEZIJKLMNOPRSTUFXCW~Y'abvgdezijklmnoprstufxcw~y'\377"; $tab{"MAC"}="݆"; # 1234567890123456789012345678901234567890123456789012345678901234567890 sub cstocs { my ($Src, $Dst, $Buf) = @_; $Src = uc ($Src); $Src .= '8' if $Src eq 'KOI'; $Dst = uc ($Dst); $Dst .= '8' if $Dst eq 'KOI'; if ($Src eq 'UTF8') { my $map = Unicode::Map8->new("cp1251"); $Buf = $map->to8 (Unicode::String::utf8 ($Buf)->ucs2); $Src = 'WIN'; } if ($Dst eq 'UTF8') { eval "\$Buf =~ tr/$tab{$Src}/$tab{'WIN'}/"; my $map = Unicode::Map8->new("cp1251"); $Buf = $map->tou ($Buf)->utf8; } else { eval "\$Buf =~ tr/$tab{$Src}/$tab{$Dst}/"; } if ($Dst eq 'VOL') { $Buf =~s//YO/go; $Buf =~s//ZH/go; $Buf =~s//CH/go; $Buf =~s//SH/go; $Buf =~s//E\'/go; $Buf =~s//YU/go; $Buf =~s//YA/go; $Buf =~s//yo/go; $Buf =~s//zh/go; $Buf =~s//ch/go; $Buf =~s//sh/go; $Buf =~s//e\'/go; $Buf =~s//yu/go; $Buf =~s/\377/ya/go; } $Buf; } sub toLower { my ($s, $Code) = @_; $Code = uc ($Code); if (exists $UCase {$Code} and exists $LCase {$Code}) { eval ("\$s =~ tr/$UCase{$Code}/$LCase{$Code}/"); } $s; } sub toUpper { my ($s, $Code) = @_; $Code = uc ($Code); if (exists $UCase {$Code} and exists $LCase {$Code}) { eval ("\$s =~ tr/$LCase{$Code}/$UCase{$Code}/"); } $s; } __END__ =head1 SYNOPSIS use Convert::Cyrillic; $src = 'koi8'; $dst = 'win'; $SrcBuf = 'text in koi8 here'; $DstBuf = Convert::Cyrillic::cstocs ($Src, $Dst, $SrcBuf); =head1 DESCRIPTION This package implements routine for converting from one cyrillic charset to another. It is intended to be used from cgi's which need built-in support for translations. For example, you may wish to use it in form processor to translate from user encoding to one used by your site. Where B<$Src> and B<$Dst> are one of: KOI8 - for KOI8-R WIN - for WIN-1251 DOS - for DOS, alternative, CP-866 MAC - for Macintosh ISO - for ISO-8859-5 UTF-8 - for UTF-8 (Unicode) VOL - for Volapuk (transliteration) Buffer may contain line breaks, which are preserved. =head1 NOTES Part of "WWW Cyrillic Encoding Suite" Get docs and newest version from http://www.neystadt.org/cyrillic/ Copyright (c) 1997-98, John Neystadt You may install this script on your web site for free. To obtain permision for redistribution or any other usage contact john@neystadt.org. Drop me a line if you deploy this script on your site. =head1 AUTHOR John Neystadt =head1 SEE ALSO perl(1), Lingua::DetectCharset(3). =cut