=head1 NAME Lingua::KO::Romanize::Hangul - Romanization of Korean language =head1 SYNOPSIS use Lingua::KO::Romanize::Hangul; my $conv = Lingua::KO::Romanize::Hangul->new(); my $roman = $conv->char( $hangul ); printf( "%s%s", $hangul, $roman ); my @array = $conv->string( $string ); foreach my $pair ( @array ) { my( $raw, $ruby ) = @$pair; if ( defined $ruby ) { printf( "%s%s", $raw, $ruby ); } else { print $raw; } } =head1 DESCRIPTION Hangul is phonemic characters of the Korean language. This module follows the C which was released on July 7, 2000 as the official romanization system in South Korea. =head2 $conv = Lingua::KO::Romanize::Hangul->new(); This constructer methods returns a new object. =head2 $roman = $conv->char( $hangul ); This method returns romanized letters of a Hangul character. It returns undef when $hanji is not a valid Hangul character. The argument's encoding must be UTF-8. =head2 $roman = $conv->chars( $string ); This method returns romanized letters of Hangul characters. =head2 @array = $conv->string( $string ); This method returns a array of referenced arrays which are pairs of a Hangul chacater and its romanized letters. $array[0] # first Korean character's pair (array) $array[1][0] # secound Korean character itself $array[1][1] # its romanized letters =head1 UTF-8 FLAG This module treats utf8 flag transparently. =head1 SEE ALSO L for Japanese L for Chinese http://www.korean.go.kr/06_new/rule/rule06.jsp http://www.kawa.net/works/perl/romanize/romanize-e.html =head1 COPYRIGHT AND LICENSE Copyright (c) 1998-2008 Yusuke Kawasaki. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut # ---------------------------------------------------------------- package Lingua::KO::Romanize::Hangul; use strict; use vars qw( $VERSION ); $VERSION = "0.20"; my $PERL581 = 1 if ( $] >= 5.008001 ); my $INITIAL_LETTER = [map {$_ eq '-' ? '' : $_} qw( g kk n d tt r m b pp s ss - j jj ch k t p h )]; my $PEAK_LETTER = [map {$_ eq '-' ? '' : $_} qw( a ae ya yae eo e yeo ye o wa wae oe yo u wo we wi yu eu ui i )]; my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw( - g kk ks n nj nh d r lg lm lb ls lt lp lh m b ps s ss ng j c k t p h )]; # my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw( # - g kk ks n nj nh d r rg rm rb rs rt # rp rh m b bs s ss ng j c k t p h # )]; # ---------------------------------------------------------------- sub new { my $package = shift; my $self = {@_}; bless $self, $package; $self; } sub char { my $self = shift; return $self->_char(@_) unless $PERL581; my $char = shift; my $utf8 = utf8::is_utf8( $char ); utf8::encode( $char ) if $utf8; $char = $self->_char( $char ); utf8::decode( $char ) if $utf8; $char; } sub _char { my $self = shift; my $char = shift; my( $c1, $c2, $c3, $c4 ) = unpack("C*",$char); return if ( ! defined $c3 || defined $c4 ); my $ucs2 = (($c1 & 0x0F)<<12) | (($c2 & 0x3F)<<6) | ($c3 & 0x3F); return if ( $ucs2 < 0xAC00 ); return if ( $ucs2 > 0xD7A3 ); my $han = $ucs2 - 0xAC00; my $init = int( $han / 21 / 28 ); my $peak = int( $han / 28 ) % 21; my $fin = $han % 28; join( "", $INITIAL_LETTER->[$init], $PEAK_LETTER->[$peak], $FINAL_LETTER->[$fin] ); } sub chars { my $self = shift; my @array = $self->string( shift ); join( " ", map {$#$_>0 ? $_->[1] : $_->[0]} @array ); } sub string { my $self = shift; return $self->_string(@_) unless $PERL581; my $char = shift; my $flag = utf8::is_utf8( $char ); utf8::encode( $char ) if $flag; my @array = $self->_string( $char ); if ( $flag ) { foreach my $pair ( @array ) { utf8::decode( $pair->[0] ) if defined $pair->[0]; utf8::decode( $pair->[1] ) if defined $pair->[1]; } } @array; } # [UCS-2] AC00-D7A3 # [UTF-8] EAB080-ED9EA3 # EA-ED are appeared only as Hangul's first character. sub _string { my $self = shift; my $src = shift; my $array = []; while ( $src =~ /([\xEA-\xED][\x80-\xBF]{2})|([^\xEA-\xED]+)/sg ) { if ( defined $1 ) { my $pair = [ $1 ]; my $roman = $self->char( $1 ); $pair->[1] = $roman if defined $roman; push( @$array, $pair ); } else { push( @$array, [ $2 ] ); } } for ( my $i = 0 ; $i < $#$array ; $i++ ) { next if ( scalar @{ $array->[$i] } < 2 ); next if ( scalar @{ $array->[ $i + 1 ] } < 2 ); my $this = $array->[$i]->[1]; my $next = $array->[ $i + 1 ]->[1]; my $novowel = 1 unless ( $next =~ /^[aeouiwy]/ ); if ( $this =~ /(tt|pp|jj)$/ && $novowel ) { $array->[$i]->[1] =~ s/(tt|pp|jj)$//; } elsif ( $this =~ /([^n]g|kk)$/ && $novowel ) { $array->[$i]->[1] =~ s/(g|kk)$/k/; } elsif ( $this =~ /(d|j|ch|s?s)$/ && $novowel ) { $array->[$i]->[1] =~ s/(d|j|ch|s?s)$/t/; } elsif ( $this =~ /(b)$/ && $novowel ) { $array->[$i]->[1] =~ s/(b)$/p/; } elsif ( $this =~ /(r)$/ && $novowel ) { $array->[$i]->[1] =~ s/(r)$/l/; $array->[$i+1]->[1] =~ s/^r/l/; } } if ( scalar @$array ) { my $last = $array->[$#$array]; my $this = $last->[1]; if ( $this =~ /(tt|pp|jj)$/ ) { $last->[1] =~ s/(tt|pp|jj)$//; } elsif ( $this =~ /([^n]g|kk)$/ ) { $last->[1] =~ s/(g|kk)$/k/; } elsif ( $this =~ /(d|j|ch|s?s)$/ ) { $last->[1] =~ s/(d|j|ch|s?s)$/t/; } elsif ( $this =~ /(b)$/ ) { $last->[1] =~ s/(b)$/p/; } elsif ( $this =~ /(r)$/ ) { $last->[1] =~ s/(r)$/l/; } } @$array; } # ---------------------------------------------------------------- ;1;