package Lingua::Klingon::Recode; # vim:set tw=72 sw=2 cin cink-=0#: use 5.005; use strict; use Carp; require Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use Lingua::Klingon::Recode ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. %EXPORT_TAGS = ( 'all' => [ qw( recode ) ] ); @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); @EXPORT = qw( ); $VERSION = '1.0'; # convert from encoding FOO to uhmal gnj my %from = ( 'tlhIngan' => { 'a' => 'a', 'b' => 'b', 'ch' => 'c', 'D' => 'd', 'e' => 'e', 'gh' => 'f', 'H' => 'g', 'I' => 'h', 'j' => 'i', 'l' => 'j', 'm' => 'k', 'n' => 'l', 'ng' => 'm', 'o' => 'n', 'p' => 'o', 'q' => 'p', 'Q' => 'q', 'r' => 'r', 'S' => 's', 't' => 't', 'tlh' => 'u', 'u' => 'v', 'v' => 'w', 'w' => 'x', 'y' => 'y', "'" => 'z', }, 'tlhingan' => { 'a' => 'a', 'b' => 'b', 'ch' => 'c', 'd' => 'd', 'e' => 'e', 'gh' => 'f', 'h' => 'g', 'i' => 'h', 'j' => 'i', 'l' => 'j', 'm' => 'k', 'n' => 'l', 'ng' => 'm', 'o' => 'n', 'p' => 'o', 'q' => 'p', # 'q' => 'q', # this gets lost :( 'r' => 'r', 's' => 's', 't' => 't', 'tlh' => 'u', 'u' => 'v', 'v' => 'w', 'w' => 'x', 'y' => 'y', "'" => 'z', }, 'XIFAN' => { 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'G' => 'f', 'H' => 'g', 'I' => 'h', 'J' => 'i', 'L' => 'j', 'M' => 'k', 'N' => 'l', 'F' => 'm', 'O' => 'n', 'P' => 'o', 'K' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't', 'X' => 'u', 'U' => 'v', 'V' => 'w', 'W' => 'x', 'Y' => 'y', 'Z' => 'z', "'" => 'z', }, ); # the same mapping serves both $from{'XIFANZ'} = $from{'XIFAN'}; # convert from uhmal gnj to encoding FOO my %to; $to{'tlhIngan'} = { reverse %{$from{'tlhIngan'}} }; $to{'tlhingan'} = { reverse %{$from{'tlhingan'}} }; $to{'tlhingan'}{'q'} = 'q'; # map both 'Q' and 'q' to 'q' $to{'XIFAN'} = { reverse %{$from{'XIFAN'}} }; $to{'XIFAN'}{'z'} = "'"; $to{'XIFANZ'} = { reverse %{$from{'XIFAN'}} }; $to{'XIFANZ'}{'z'} = 'Z'; # map names to internal form my %internal = ( 'tlhIngan' => 'tlhIngan', 'tlhIngan Hol' => 'tlhIngan', "tlhIngan Hol Dajalh'a'" => 'tlhIngan', 'tlhingan' => 'tlhingan', 'tlhingan hol' => 'tlhingan', "tlhingan hol dajatlh'a'" => 'tlhingan', 'TLHINGAN' => 'TLHINGAN', 'TLHINGAN HOL' => 'TLHINGAN', "TLHINGAN HOL DAJATLH'A'" => 'TLHINGAN', 'xifan' => 'xifan', 'xifan hol' => 'xifan', "xifan hol dajax'a'" => 'xifan', 'xifan hol dajaxzaz' => 'xifanz', 'xifanz' => 'xifanz', 'XIFAN' => 'XIFAN', 'XIFAN HOL' => 'XIFAN', "XIFAN HOL DAJAX'A'" => 'XIFAN', 'XIFAN HOL DAJAXZAZ' => 'XIFANZ', 'XIFANZ' => 'XIFANZ', 'uhmal' => 'uhmal', 'uhmal gnj' => 'uhmal', 'uhmal gnj daiauzaz' => 'uhmal', 'UHMAL' => 'UHMAL', 'UHMAL GNJ' => 'UHMAL', 'UHMAL GNJ DAIAUZAZ' => 'UHMAL', ); # regular expression that defines a letter my %letter = ( 'uhmal' => qr/[a-z]/, 'tlhIngan' => qr/tlh|[cg]h|ng|[abDeHIjlmnopqQrStuvwy']/, 'tlhingan' => qr/tlh|[cg]h|ng|[abdehijlmnopqrstuvwy']/, 'XIFAN' => qr/[A-Y']/, 'XIFANZ' => qr/[A-Z]/, ); # based on my %base = ( 'uhmal' => 'uhmal', 'UHMAL' => 'uhmal', 'tlhIngan' => 'tlhIngan', 'tlhingan' => 'tlhingan', 'TLHINGAN' => 'tlhingan', 'xifan' => 'XIFAN', 'xifanz' => 'XIFANZ', 'XIFAN' => 'XIFAN', 'XIFANZ' => 'XIFANZ', ); # The subroutine itself sub recode { my $from = shift; my $to = shift; my @input; my @result; if (not exists $internal{$from}) { croak "Can't recognise 'from' encoding '$from'"; } elsif (not exists $internal{$to}) { croak "Can't recognise 'to' encoding '$to'"; } else { $from = $internal{$from}; $to = $internal{$to}; if($to eq $from) { return +(wantarray ? @_ : $_[0]); } elsif($to eq lc $from) { return +(wantarray ? map lc, @_ : lc $_[0]); } elsif($to eq uc $from) { return +(wantarray ? map uc, @_ : uc $_[0]); } else { # transform from initial encoding to uhmal if($from eq 'UHMAL') { @input = map lc, @_; } elsif($from eq 'uhmal') { @input = @_; } else { @input = @_; for(@input) { $_ = lc $_ if $base{$from} eq lc $from; $_ = uc $_ if $base{$from} eq uc $from; s/($letter{$base{$from}})/$from{$base{$from}}{$1}/g; } } # transform from uhmal to final encoding if($to eq 'UHMAL') { @result = map uc, @input; } elsif($to eq 'uhmal') { @result = @input; } else { @result = @input; for(@result) { s/($letter{'uhmal'})/$to{$base{$to}}{$1}/g; $_ = lc $_ if $to eq lc $base{$to}; $_ = uc $_ if $to eq uc $base{$to}; } } } } return +(wantarray ? @result : $result[0]); } 1; __END__ =head1 NAME Lingua::Klingon::Recode - Convert Klingon words between different encodings =head1 VERSION This document refers to version 1.00 of Lingua::Klingon::Recode, released on 2003-10-19. =head1 SYNOPSIS use Lingua::Klingon::Recode; @copy = Lingua::Klingon::Recode::recode('XIFAN HOL', # from 'tlhIngan Hol', # to @original); or use Lingua::Klingon::Collate ':all'; @copy = recode 'XIFAN HOL', 'tlhIngan Hol', @original; or # demonstrate scalar version for a change use Lingua::Klingon::Collate qw( recode ); $copy = recode 'XIFAN HOL', 'tlhIngan Hol', $original; =head1 DESCRIPTION =head2 Overview Lingua::Klingon::Recode is a module which allows you to convert Klingon text from one encoding to another. For example, one frequently-used encoding is the so-called 'XIFAN HOL' encoding (after the encoded version of what is 'tlhIngan Hol' in the standard encoding). All suported encodings can represent the same sounds, but they use different characters or sequences of characters to do so. Most encodings have three allowable names (or aliasses): the transliteration of 'tlhIngan Hol', an abbreviated version with only 'tlhIngan', and an extended version "tlhIngan Hol Dajatlh'a'" (this extended version is mainly used to distinguish between related encodings that differ in how they represent the apostrophe). The support encodings, with their recognised aliasses, are: =over 4 =item tlhIngan Hol, tlhIngan, tlhIngan Hol Dajatlh'a' The standard encoding for Klingon; this is the usual transliteration. =item tlhingan hol, tlhingan, tlhingan hol dajatlh'a' This is the same as the standard encoding, except that all letters are lowercase. This has the grave disadvantage that 'q' and 'Q' are conflated. =item TLHINGAN HOL, TLHINGAN, TLHINGAN HOL DAJATLH'a' This is the same as the standard encoding, except that all letters are uppercase. Again, 'q' and 'Q' cannot be distinguished in this encoding. =item XIFAN HOL, XIFAN, XIFAN HOL DAJAX'A' This is the so-called "XIFAN HOL" encoding, used, for example, as the encoding of some fonts. =item XIFAN HOL DAJAXZAZ, XIFANZ This is the same encoding as the one above, but it uses Z for what is written ' (apostrophe) in the standard encoding, rather than the apostrophe. This encoding uses only the 26 letters of the uppercase Roman alphabet. This encoding must be specified in its full form, or in the abbreviation 'XIFANZ'. =item xifan hol, xifan, xifan hol dajax'a' This is the "XIFAN HOL" encoding with all letters replaced by their lowercase form. =item xifan hol dajaxzaz, xifanz This is the "XIFAN HOL" encoding with all letters replaced by their lowercase form, and with 'z' rather than "'" for the apostrophe. This encoding uses only the 26 letters of the lowercase Roman alphabet. =item uhmal gnj, uhmal, uhmal daiauzaz This is an encoding formed by representing each Klingon letter in the standard transliteration with a lower-case Roman letter in alphabetical order according to the standard transliteration. It is also the encoding currently used by L. It can be represented as followed: tlhIngan Hol a b ch D e gh H I j l m n ng uhmal gnj a b c d e f g h i j k l m tlhIngan Hol o p q Q r S t tlh u v w y ' uhmal gnj n o p q r s t u v w x y z =item UHMAL GNJ, UHMAL, UHMAL GNJ DAIAUZAZ This is the uppercase version of the 'uhmal gnj' encoding. =back =head2 Exports Lingua::Klingon::Recode exports no functions by default, in order to avoid namespace pollution. However, all functions listed here can be imported explicitly by naming them, or they can be imported all together by using the tag ':all'. =head2 recode This is currently the only subroutine in this module. It takes at least three parameters: =over 4 =item * "from" encoding =item * "to" encoding =item * one or more strings =back The strings are converted from the "from" encoding to the "to" encoding. In list context, returns all the converted strings. In scalar context, returns the first converted string. =head1 BUGS None currently known. Please report any bugs found through http://rt.cpan.org/ or by emailing the author. =head1 SEE ALSO L =head1 FEEDBACK If you use this module, I'd appreciate it if you drop me a line at the email address in L, just so that I have an idea of how many people use this module at all. Also, if you have any comments, feel free to email me. =head1 AUTHOR Philip Newton, Epne@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2003 by Philip Newton. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: =over 4 =item * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. =item * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. =item * Neither the name of Philip Newton nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. =back THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =cut