package Unicode::Unihan; use 5.008001; use strict; use warnings; our $VERSION = do { my @r = (q$Revision: 0.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; use Carp; BEGIN{ @AnyDBM_File::ISA = qw(DB_File GDBM_File SDBM_File) ; } use AnyDBM_File; use Fcntl; sub new($;){ my $class = shift; my $dir = __FILE__; $dir =~ s/\.pm//o; -d $dir or die "DB Directory $dir nonexistent!"; return bless { '_dir_' => $dir, @_ } => $class; } sub load($$){ my ($self, $name) = @_; if ($self->{'-savemem'}){ for my $k (keys %$self){ $k eq $name and next; $k =~ /^[A-Z]/o and delete $self->{$k}; } } unless ( $self->{$name} ){ my $file = $self->{_dir_} . "/$name.db"; -f $file or croak "There is no DB for $name"; tie %{$self->{$name}}, 'AnyDBM_File', $file, O_RDONLY, 0444 or die "$file: $!"; } $self; } sub unload($;){ my $self = shift; if (@_){ while(my $k = shift) { $k =~ /^[A-Z]/o and delete $self->{$k}; } }else{ for my $k (keys %$self){ $k =~ /^[A-Z]/o and delete $self->{$k}; } } $self; } sub DESTROY { $DEBUG and warn "$_[0] destroyed!"; } sub AUTOLOAD { my $self = shift; my $name = our $AUTOLOAD; $name =~ s/.*:://o; $self->load($name); no strict 'refs'; *$AUTOLOAD = sub { my $self = shift; @_ or return; my $str = shift; length($str) or return; if (wantarray){ my @result = (); for my $ord (unpack("U*", $str)){ push @result, $self->{$name}{$ord}; } return @result; }else{ return $self->{$name}{ord($str)}; } }; return $self->$name(@_); } 1; __END__ # Below is stub documentation for your module. You'd better edit it! =encoding utf8 =head1 NAME Unicode::Unihan - The Unihan Data Base 5.1.0 =head1 SYNOPSIS use Unicode::Unihan; my $uh = Unicode::Unihan->new; print join("," => $uh->Mandarin("\x{5c0f}\x{98fc}\x{5f3e}")), "\n"; =head1 ABSTRACT This module provides a user-friendly interface to the Unicode Unihan Database 5.1. With this module, the Unihan database is as easy as shown in the SYNOPSIS above. =head1 DESCRIPTION The first thing you do is make the database available. Just say use Unicode::Unihan; my $uh = new Unicode::Unihan; That's all you have to say. After that, you can access the database via $uh-EI($string) where I is the tag in the Unihan Database, without 'k' prefix. =over 2 =item $data = $uh-EI($string) =item @data = $uh-EI($string) The first form (scalar context) returns the Unihan Database entry of the first character in $string. The second form (array context) checks the entry for each character in $string. @data = $uh->Mandarin("\x{5c0f}\x{98fc}\x{5f3e}"); # @data is now ('SHAO4 XIAO3','SI4','DAN4') @data = $uh->JapaneseKun("\x{5c0f}\x{98fc}\x{5f3e}"); # @data is now ('CHIISAI KO O','KAU YASHINAU','TAMA HAZUMU HIKU') =back =head1 FIELDS AND THEIR MEANINGS For a complete list of tags, their meaning, and information on parsing the value field, see L. =head1 SEE ALSO =over 2 =item L =item L =item The Unihand Database L =back =head1 AUTHOR =over 2 =item of the Module Dan Kogai Edankogai@dan.co.jpE =item of the Source Data Unicode, Inc. =back =head1 COPYRIGHT AND LICENSE =over 2 =item of the Module Copyright 2002-2008 by Dan Kogai, All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =item of the Source Data Copyright (c) 1996-2008 Unicode, Inc. All Rights reserved. Name: Unihan database Unicode version: 5.1.0 Table version: 1.1 Date: 3 March 2008 =back