package UnicodeCD; use strict; use warnings; our $VERSION = '0.1'; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(charinfo charblock charscript charblocks charscripts charinrange compexcl casefold casespec); use Carp; =head1 NAME UnicodeCD - Unicode character database =head1 SYNOPSIS use UnicodeCD 'charinfo'; my $charinfo = charinfo($codepoint); use UnicodeCD 'charblock'; my $charblock = charblock($codepoint); use UnicodeCD 'charscript'; my $charscript = charblock($codepoint); =head1 DESCRIPTION The Unicode module offers a simple interface to the Unicode Character Database. =cut my $UNICODEFH; my $BLOCKSFH; my $SCRIPTSFH; my $VERSIONFH; my $COMPEXCLFH; my $CASEFOLDFH; my $CASESPECFH; sub openunicode { my ($rfh, @path) = @_; my $f; unless (defined $$rfh) { for my $d (@INC) { use File::Spec; $f = File::Spec->catfile($d, "unicode", @path); last if open($$rfh, $f); undef $f; } croak __PACKAGE__, ": failed to find ", File::Spec->catfile(@path), " in @INC" unless defined $f; } return $f; } =head2 charinfo use UnicodeCD 'charinfo'; my $charinfo = charinfo(0x41); charinfo() returns a reference to a hash that has the following fields as defined by the Unicode standard: key code code point with at least four hexdigits name name of the character IN UPPER CASE category general category of the character combining classes used in the Canonical Ordering Algorithm bidi bidirectional category decomposition character decomposition mapping decimal if decimal digit this is the integer numeric value digit if digit this is the numeric value numeric if numeric is the integer or rational numeric value mirrored if mirrored in bidirectional text unicode10 Unicode 1.0 name if existed and different comment ISO 10646 comment field upper uppercase equivalent mapping lower lowercase equivalent mapping title titlecase equivalent mapping block block the character belongs to (used in \p{In...}) script script the character belongs to If no match is found, a reference to an empty hash is returned. The C property is the same as as returned by charinfo(). It is not defined in the Unicode Character Database proper (Chapter 4 of the Unicode 3.0 Standard) but instead in an auxiliary database (Chapter 14 of TUS3). Similarly for the C