# ###################################################################### Otakar Smrz, 2003/01/23 # # Encodings of Arabic ########################################################################## # $Id: Arabic.pm,v 1.11 2004/08/21 11:02:10 smrz Exp $ package Encode::Arabic; our $VERSION = do { my @r = q$Revision: 1.11 $ =~ /\d+/g; sprintf "%d." . "%02d" x $#r, @r }; sub import { # perform import as if Encode were used one level before this module if (defined $_[1] and $_[1] eq ':modes') { require Exporter; @ISA = qw 'Exporter'; @EXPORT_OK = qw 'enmode demode'; __PACKAGE__->export_to_level(1, $_[0], 'enmode', 'demode'); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); } use lib '..'; use Encode::Arabic::ArabTeX; use Encode::Arabic::ArabTeX::RE; use Encode::Arabic::ArabTeX::ZDMG; use Encode::Arabic::ArabTeX::ZDMG::RE; use Encode::Arabic::Buckwalter; sub enmode ($@) { my $enc = shift; my $obj = Encode::find_encoding($enc); unless (defined $obj){ require Carp; Carp::croak("Unknown encoding '$enc'"); } $obj->enmode(@_); } sub demode ($@) { my $enc = shift; my $obj = Encode::find_encoding($enc); unless (defined $obj){ require Carp; Carp::croak("Unknown encoding '$enc'"); } $obj->demode(@_); } 1; __END__ =head1 NAME Encode::Arabic - Perl extension for encodings of Arabic =head1 REVISION $Revision: 1.11 $ $Date: 2004/08/21 11:02:10 $ =head1 SYNOPSIS use Encode::Arabic; # imports just like 'use Encode' even with options would while ($line = <>) { # renders the ArabTeX notation for Arabic both in the .. print encode 'utf8', decode 'arabtex', $line; # .. Arabic script proper and the print encode 'utf8', decode 'arabtex-zdmg', $line; # .. Latin phonetic transcription } # 'use Encode::Arabic ":modes"' would export the functions controlling the conversion modes Encode::Arabic::demode 'arabtex', 'default'; Encode::Arabic::enmode 'buckwalter', 'full', 'xml', 'strip off kashida'; # Arabic in lower ASCII transliterations <--> Arabic script in Perl's internal encoding $string = decode 'ArabTeX', $octets; $octets = encode 'Buckwalter', $string; $string = decode 'Buckwalter', $octets; $octets = encode 'ArabTeX', $string; # Arabic in lower ASCII transliterations <--> Latin phonetic transcription, Perl's utf8 $string = decode 'Buckwalter', $octets; $octets = encode 'ArabTeX', $string; $string = decode 'ArabTeX-ZDMG', $octets; $octets = encode 'utf8', $string; =head1 DESCRIPTION This module is a wrapper for various implementations of the encoding systems used for the Arabic language and covering even some non-Arabic extentions to the Arabic script. The included modules fit in the philosophy of L and can be used directly with the L module. =head2 LIST OF ENCODINGS =over =item ArabTeX ArabTeX multi-character notation for Arabic / Perl's internal format for the Arabic script L, uses L =item ArabTeX-RE Deprecated method using sequential regular-expression substitutions. Limited in scope over the ArabTeX notation and non-efficient in data processing, still, not requiring the L module. L =item ArabTeX-ZDMG ArabTeX multi-character notation for Arabic / Perl's internal format for the Latin phonetic trascription in the ZDMG style L, uses L =item ArabTeX-ZDMG-RE Deprecated method using sequential regular-expression substitutions. Limited in scope over the ArabTeX notation and non-efficient in data processing, still, not requiring the L module. L =item Buckwalter Buckwalter one-to-one notation for Arabic / Perl's internal format for the Arabic script L =back There are generic aliases to these provided by L. Case does not matter and all characters of the class C<[ _-]> are interchangable. Note that the standard L module already deals with several other single-byte encoding schemes for Arabic popular with whichever operating system, be it *n*x, Windows, DOS or Macintosh. See L and L for their identification names and aliases. =head2 EXPORTS & MODES The module exports as if C also appeared in the package. The C options are just delegated to L and imports performed properly, with the exception of the C<:modes> option coming first in the list. In such a case, the following functions will be introduced into the namespace of the using package: =over =item enmode ($enc, @list) Calls the C method associated with the given C<$enc> encoding, and passes the C<@list> to it. The idea is similar to the C functions and methods of the L and L modules, respectively. Used for control over the modes of conversion. =item demode ($enc, @list) Analogous to C, but calling the appropriate C method. See the individual implementations of the listed encodings. =back =head1 SEE ALSO Encode::Arabic Online Interface L Klaus Lagally's ArabTeX L Tim Buckwalter's Qamus L Arabeyes Arabic Unix Project L L, L, L, L, L L, L L, L, L L L =head1 AUTHOR Otakar Smrz, L eval { 'E' . 'smrz' . "\x40" . ( join '.', qw 'ckl mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003, 2004 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut