=head1 NAME
Lingua::ZH::Romanize::Pinyin - Romanization of Standard Chinese language
=head1 SYNOPSIS
use Lingua::ZH::Romanize::Pinyin;
my $conv = Lingua::ZH::Romanize::Pinyin->new();
my $roman = $conv->char( $hanji );
printf( "%s", $hanji, $roman );
my @array = $conv->string( $string );
foreach my $pair ( @array ) {
my( $raw, $ruby ) = @$pair;
if ( defined $ruby ) {
printf( "%s", $raw, $ruby );
} else {
print $raw;
}
}
=head1 DESCRIPTION
Pinyin is a phonemic notation for Chinese characters.
=head2 $conv = Lingua::ZH::Romanize::Pinyin->new();
This constructer methods returns a new object with its dictionary cached.
=head2 $roman = $conv->char( $hanji );
This method returns romanized letters of a Hanji character.
It returns undef when $hanji is not a valid Hanji character.
The argument's encoding must be UTF-8.
Both of Simplified Chinese and Traditional Chinese are allowed.
=head2 $roman = $conv->chars( $string );
This method returns romanized letters of Hanji characters.
=head2 @array = $conv->string( $string );
This method returns a array of referenced arrays
which are pairs of a Hanji chacater and its romanized letters.
$array[0] # first Chinese character's pair (array)
$array[1][0] # secound Chinese character itself
$array[1][1] # its romanized letters
=head1 DICTIONARY
This module internally uses a mapping table from Hanji to roman
which is based on C which is distributed with C.
=head1 MODULE DEPENDENCY
L module is required.
=head1 UTF-8 FLAG
This treats utf8 flag transparently.
=head1 SEE ALSO
L for romanization of Cantonese
L for romanization of Japanese
L for romanization of Korean
http://www.kawa.net/works/perl/romanize/romanize-e.html
http://linuga-romanize.googlecode.com/svn/trunk/Lingua-ZH-Romanize-Pinyin/
=head1 COPYRIGHT
Copyright (c) 2003-2008 Yusuke Kawasaki. All rights reserved.
=head1 LICENSE
Any commercial use of the Software requires a license directly from
the author(s). Please contact the author(s) to negotiate an
appropriate license. Commercial use includes integration of all or
part of the binary or source code covered by this permission
notices into a product for sale or license to third parties on your
behalf, or distribution of the binary or source code to third
parties that need it to utilize a product sold or licensed on your
behalf.
=cut
package Lingua::ZH::Romanize::Pinyin;
use strict;
use Carp;
use Storable;
use vars qw( $VERSION );
$VERSION = "0.23";
my $PERL581 = 1 if ( $] >= 5.008001 );
sub new {
my $package = shift;
my $store = shift || &_detect_store($package);
Carp::croak "$! - $store\n" unless ( -r $store );
my $self = Storable::retrieve($store) or Carp::croak "$! - $store\n";
bless $self, $package;
$self;
}
sub char {
my $self = shift;
return $self->_char(@_) unless $PERL581;
my $char = shift;
my $utf8 = utf8::is_utf8( $char );
utf8::encode( $char ) if $utf8;
$char = $self->_char( $char );
utf8::decode( $char ) if $utf8;
$char;
}
sub _char {
my $self = shift;
my $char = shift;
return unless exists $self->{$char};
$self->{$char};
}
sub chars {
my $self = shift;
my @array = $self->string(shift);
join( " ", map { $#$_ > 0 ? $_->[1] : $_->[0] } @array );
}
sub string {
my $self = shift;
return $self->_string(@_) unless $PERL581;
my $char = shift;
my $flag = utf8::is_utf8( $char );
utf8::encode( $char ) if $flag;
my @array = $self->_string( $char );
if ( $flag ) {
foreach my $pair ( @array ) {
utf8::decode( $pair->[0] ) if defined $pair->[0];
utf8::decode( $pair->[1] ) if defined $pair->[1];
}
}
@array;
}
sub _string {
my $self = shift;
my $src = shift;
my $array = [];
while ( $src =~ /([\300-\377][\200-\277]+)|([\000-\177]+)/sg ) {
if ( defined $1 ) { # Chinese
my $pair = [$1];
$pair->[1] = $self->{$1} if exists $self->{$1};
push( @$array, $pair );
}
else {
push( @$array, [$2] ); # ASCII
}
}
@$array;
}
# Pinyin.pm -> Pinyin.store
# Cantonese.pm -> Cantonese.store
sub _detect_store {
my $package = shift;
my $store = $INC{ join( "/", split( "::", "$package.pm" ) ) };
$store =~ s#\.pm$#.store# or Carp::croak "Invalid module name: $package\n";
$store;
}
1;