########################################################################## ## All portions of this code are copyright (c) 2003,2004 nethype GmbH ## ########################################################################## ## Using, reading, modifying or copying this code requires a LICENSE ## ## from nethype GmbH, Franz-Werfel-Str. 11, 74078 Heilbronn, ## ## Germany. If you happen to have questions, feel free to contact us at ## ## license@nethype.de. ## ########################################################################## package PApp::I18n; =head1 NAME PApp::I18n - internationalisation support for PApp =head1 SYNOPSIS use PApp::I18n; # nothing exported by default my $translator = PApp::I18n::open_translator("/libdir/i18n/myapp", "de"); my $table = $translator->get_table("uk,de,en"); # will return de translator print $table->gettext("yeah"); # better define __ and N_ functions =head1 DESCRIPTION This module provides basic translation services, .po-reader and writer support and text and database scanners to identify tagged strings. =head2 Anatomy of a Language/Locale ID A "language" can be designated by either a free-form-string (that doesn't match the following formal definition) or a language-region code that must match the following regex: /^ ([a-z][a-z][a-z]?) (?:[-_] ([a-z][a-z][a-z]?))? (?:\.(\S+))? $/ix ^ ^ ^ ^ "two or three letter code" "optionally followed by" "- or _ as seperator" "two or three letter code" "optionally followed by" ". as seperator" "character encoding" There is no charset indicator, as only utf-8 is supported currently. The first part must be a two or three letter code from iso639-2/t (alpha2 or alpha3), optionally followed by the two or three letter country/region code from iso3166-1 and -2. Numeric region codes might be supported one day. =cut no warnings; use utf8; no bytes; use File::Glob; use Convert::Scalar 'weaken'; use Convert::Scalar ':utf8'; use PApp::Exception; use PApp::SQL; use PApp::Config; BEGIN { use base 'Exporter'; $VERSION = 1.43; @EXPORT = qw(); @EXPORT_OK = qw( open_translator scan_file scan_init scan_end scan_field export_dpo normalize_langid translate_langid locale_charsets ); require XSLoader; XSLoader::load PApp::I18n, $VERSION; } my ($iso3166, $iso639, $locale) = do { local $/; split /^__SPLIT__/m, utf8_on ; }; { sub iso639 { [map [split /\s+/, $_, 3], split /\n/, $iso639 ] } sub iso639_a2_a3 { $iso639 =~ /^(...)\t\Q$_[0]\E\t/m ? $1 : $_[0] } sub iso639_a3_name { $iso639 =~ /^\Q$_[0]\E\t[^\t]*\t(.*)$/m and $1 } sub iso3166 { [map [split /\s+/, $_, 3], split /\n/, $iso3166] } sub iso3166_a2_a3 { $iso3166 =~ /^(...)\t\Q$_[0]\E\t/m ? $1 : $_[0] } sub iso3166_a3_name { $iso3166 =~ /^\Q$_[0]\E\t[^\t]*\t(.*)$/m and $1 } sub locale2charsets { $locale =~ /^\Q$_[0]\E\t(.*)$/m and $1 } } our $i18ndir; =over 4 =item set_base $path Set the default i18n directory to C<$path>. This must be done before any calls to C or when using relative i18n paths. =cut sub set_base($) { $i18ndir = shift; } =item normalize_langid $langid Normalize the language and country id into it's three-letter form, if possible. This requires a grep through a few kb of text but the result is cached. The special language code "*" is translated to "mul". =cut our %nlid_cache = (); my $locale_regex = qr/^ ([a-z][a-z][a-z]?) (?:[-_] ([a-z][a-z][a-z]?))? (?:\. (\S+))? $/ix; sub normalize_langid($) { use bytes; $nlid_cache{$_[0]} ||= do { local $_ = lc $_[0]; if ($_ =~ $locale_regex) { my ($l, $c, $e) = (lc $1, lc $2, lc $3); $l = "mul" if $l eq "*"; $l = iso639_a2_a3 $l if 3 > length $l; $l = "heb" if $l eq "iw"; # "iw" is the old code, which has not been reused so far $l = "yid" if $l eq "ji"; # "ji" is the old code, which has not been reused so far if ($c ne "") { $c = iso3166_a2_a3 $c if 3 > length $c; $l .= "_$c"; } $l = "zha" if $l eq "zho_twn"; # new code "Zhuang" for "Chinese Traditional" $l = "zho" if $l eq "zho_chn"; # old code "Chinese" here means "Chinese Simplified" if ($e ne "") { $l .= ".$e"; } $l; } else { $_; } } } =item translate_langid $langid[, $langid] Decode the first C into a description of itself and translate it into the language specified by the second C (the latter does not work yet). The result of this function is being cached. =cut our %tlid_cache = (); our $tlid_iso3166; our $tlid_iso639; # perl does STRANGE things to characters when using # ucfirst unarmed (like duplicating han characters etc...) sub _ucfirst { local $_ = shift; substr($_,0,1) =~ y[abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ] [ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ]; $_; } sub translate_langid($;$) { $tlid_cache{"$_[0]\x00$_[1]"} ||= do { my $langid = normalize_langid $_[0]; my $dest = $_[1]; use bytes; if ($langid =~ $locale_regex) { no bytes; my ($l, $c) = ($1, $2); $l = iso639_a3_name $l; if (@_) { $tlid_iso639 ||= open_translator ("iso639", "en"); $l = _ucfirst $tlid_iso639->get_table ($_[0])->gettext ($l); } if ($c) { $c = iso3166_a3_name $c; if (@_) { no bytes; $tlid_iso3166 ||= open_translator ("iso3166", "en"); $c = _ucfirst $tlid_iso3166->get_table ($_[0])->gettext ($c); } return "$l ($c)" if $c; } elsif ($l) { return $l; } } undef; } } =item locale_charsets $locale Returns a list of character sets that might be good to use for this locale. This definition is neccessarily imprecise. The charsets returned should be considered to be in priority order, i.e. the first charset is the best. The intention of this function is to provide a list of character sets to try when outputting html text (you can output any html text in any encoding supporting html's active characters, so this is indeed a matter of taste). If the locale contains a character set it will be the first in the returned list. The other charsets are taken from a list (see the source of this module for details). Here are some examples of what you might expect: de => iso-8859-1 iso-8859-15 cp1252 utf-8 rus_ukr => koi8-u iso-8859-5 cp1251 iso-ir-111 cp866 koi8-r iso-8859-5 cp1251 iso-ir-111 cp866 koi8-u utf-8 ja_JP.UTF-8 => utf-8 euc-jp sjis iso-2022-jp jis7 utf-8 This function can be slow and does NOT cache any results. =cut sub locale_charsets($) { my $locale = normalize_langid $_[0]; my @charsets; use bytes; # DEVEL9021 workaround against segfaults on regex match if ($locale =~ $locale_regex) { my ($lang, $country, $charset) = (lc $1, lc $2, lc $3); push @charsets, $charset if $charset ne ""; push @charsets, split /,/, locale2charsets "$1_$2"; push @charsets, split /,/, locale2charsets $1; } else { push @charsets, split /,/, locale2charsets $_[0]; } (@charsets, "utf-8"); } our @table_registry; END { # work around a bug perl5.6 # it seems that global destruction (which has undefined order) # causes killbackrefs to fail because the weak ref is already destroyed undef @table_registry; } =back =head2 Translation Support =over 4 =item open_translator $path, lang1, lang2.... Open an existing translation directory. A translation directory can contain any number of language translation tables with filenames of the form "language.dpo". Since the translator cannot guess in which language the source has been written you have to specify this by adding additional language names. =cut sub open_translator { my $path = shift; new PApp::I18n path => $path, langs => [@_]; } sub new { my $class = shift; my $self = { @_ }; bless $self, $class; $self->{path} = "$i18ndir/$self->{path}" unless $self->{path} =~ /^\//; opendir local *DIR, "$self->{path}" and push @{$self->{langs}}, grep s/\.dpo$//, readdir *DIR; my %uniq; @{$self->{langs}} = grep !$uniq{$_}++, map normalize_langid($_), @{$self->{langs}}; push @table_registry, $self; weaken($table_registry[-1]); $self; } =item $translator->langs Return all languages supported by this translator (in normalized form). Can be used to create language-selectors, for example. =cut sub langs { @{$_[0]->{langs}}; } #=item expand_lang langid, langid... [internal utility function] # #Try to identify the closest available language. #fixme# # #=cut sub expand_lang { my $langs = shift; my $lang; my %lang; @lang{@_} = @_; for (split /,/, $langs) { $lang = normalize_langid $_; $lang =~ s/^\s+//; $lang =~ s/\s+$//; $lang =~ y/-/_/; next unless $lang; return $lang if exists $lang{$lang}; $lang =~ s/_.*$//; return $lang if exists $lang{$lang}; for (keys %lang) { if (/^${lang}_/) { return $_; } } } (); } =item $table = $translator->get_table($languages) Find and return a translator table for the language that best matches the C<$languages>. This function always succeeds by returning a dummy trable if no (physical) table can be found. This function is very fast in the general case. =cut sub get_table { $_[0]->{table_cache}{$_[1]} ||= do { my ($self, $langs) = @_; # first, map the "langs" into a real language code $lang = expand_lang $langs, @{$self->{langs}}; # then map the lang into the corresponding .dpo file my $db = $self->{db}{$lang}; unless ($db) { my $path = "$self->{path}/$lang.dpo"; $self->{db}{$lang} = $db = new PApp::I18n::Table -r $path && $path, $lang; $db or fancydie "unable to open translation table '$lang'", "in directory '$self->{path}'"; } $db; } } =item $translation = $table->gettext($msgid) Find the translation for $msgid, or return the original string if no translation is found. If the msgid starts with the two characters "\" and "{", then these characters and all remaining characters until the closing '}' are skipped before attempting a translation. If you do want to include these two characters at the beginning of the string, use the sequence "\{\{". This can be used to specify additional arguments to some translation steps (like the language used). Here are some examples: string => translation \{\string => \translation \{\{string => \{translation \{}string => translation To ensure that the string is translated "as is" just prefix it with "\{}". =item $lang = $table->lang Return the language this translation table contains. =cut =item flush_cache Flush the translation table cache. This is rarely necessary, translation hash files are not written to. This can be used to ensure that new calls to C get the updated tables instead of already opened ones. =cut sub flush_cache { if (@_) { my $self = shift; delete $self->{db}; delete $self->{table_cache}; } else { my @tables = @table_registry; @table_registry = (); for(@tables) { if ($_) { push @table_registry, $_; $_->flush_cache; } } } $tlid_cache = (); $nlid_cache = (); } ############################################################################# use PApp::SQL; =back =head2 Scanning Support As of yet undocumented =over 4 =cut sub quote($) { local $_ = shift; utf8_upgrade $_; #d# DEVEL7952 s/\\/\\\\/g; s/\"/\\"/g; s/\n/\\n/g; s/\r/\\r/g; s/\t/\\t/g; s/([\x00-\x1f\x80-\x9f])/sprintf "\\x%02x", ord $1/ge; #s/([\x{0100}-\x{ffff}])/sprintf "\\x{%04x}", ord($1)/ge; $_; } sub unquote($) { local $_ = shift; utf8_upgrade $_; #d# DEVEL7952 s{\\(?: " (?{ "\"" }) | n (?{ "\n" }) | r (?{ "\r" }) | t (?{ "\t" }) | x ([0-9a-fA-F]{2,2}) (?{ chr hex $1 }) | x \{([0-9a-fA-F]+)\} (?{ chr hex $2 }) | \\ (?{ "\\" }) | (.) (?{ "" }) )}{ $^R }gex; $_; } sub reorganize_i18ndb { local $PApp::SQL::DBH = PApp::Config::DBH; my $st = sql_exec "select i.nr, s.lang from msgid i, msgstr s where i.nr = s.nr and i.lang = s.lang"; while (my($nr, $lang) = $st->fetchrow_array) { sql_exec "delete from msgstr where nr = ? and lang = ?", $nr, $lang; } # and non-context msgstr's sql_exec "delete from msgid where context = ''"; # delete msgid-less msgstr's my $st = sql_exec "select s.nr from msgstr s left join msgid i using (nr) where i.nr is null"; while (my($nr) = $st->fetchrow_array) { sql_exec "delete from msgstr where nr = ?", $nr; } return; } =item \%trans = fuzzy_translation $string, [$domain] Try to find a translation for the given string in the given domain (or globally) by finding the most similar string already in the database and return its translation(s). =cut sub fuzzy_translation { my ($string, $domain) = @_; local $PApp::SQL::DBH = PApp::Config::DBH; require String::Similarity; my ($st, $nr, $id); if ($domain) { $st = sql_exec \($nr, $id, $lang, $msg), "select i.nr, i.id, s.lang, s.msg from msgid as i, msgstr as s where i.nr = s.nr and domain = ? and flags & 1 != 0 order by nr", $domain; } else { $st = sql_exec \($nr, $id, $lang, $msg), "select i.nr, i.id, s.lang, s.msg from msgid as i, msgstr as s where i.nr = s.nr and flags & 1 != 0 order by nr", } my %w; my %trans; # we use a minimum similarity of 0.6 while ($st->fetch) { my $w = String::Similarity::fstrcmp($string, $id, $w{$lang} ||= 0.6); if ($w >= $w{$lang}) { $trans{$lang} = utf8_on $msg; $w{$lang} = $w; } } \%trans; } # our instead of my due to mod_perl bugs our %scan_msg; our $scan_app; =item scan_init $domain =cut sub scan_init { ($scan_app) = @_; utf8_upgrade $scan_app; %scan_msg = (); sql_exec "update msgid set context = '' where domain = ?", $scan_app; } sub scan_add { my ($lang, $id, $context) = @_; utf8_off $id; # DEVEL9916, to keep perl from killing characters without use unicode::strict push @{$scan_msg{$lang}{$id}}, $context; } =item scan_str $prefix, $string, $lang =cut sub scan_str($$$) { my ($prefix, $string, $lang) = @_; my $line = 1; utf8_upgrade $string; # DEVEL7952 # macintoshes not supported, but who cares ;-< while() { if ($string =~ m/\G ( (?> [^\012N_]+ | [N_][^\012_] | [N_]_[^\012"(] )* [N_]_ \(?" ( (?> [^"\\]+ | \\. )+ ) "\)? (?> [^\012N_]+ | [N_][^\012_] | [N_]_[^\012"(] )* ) /sgcx) { my ($context, $id) = ($1, $2); scan_add $lang, PApp::I18n::unquote $id, "$prefix:$line $context"; $line += $context =~ y%\012%%; } elsif ($string =~ m/\G([^\012]*)\012/sgc) { $line++; } else { last; } } } =item scan_file =cut sub scan_file($$) { my ($path, $lang) = @_; local *FILE; print "file '$path' for '$scan_app' in '$lang'\n"; open FILE, "<", $path or fancydie "unable to open file for scanning", "$path: $!"; local $/; my $file = ; utf8_on $file; #d# DEVEL7952 scan_str($path, $file, $lang); } =item scan_field $dsn, $field, $style, $lang =cut sub scan_field { my ($dsn, $field, $style, $lang) = @_; my $table; print "field $field for '$scan_app' in '$lang'\n"; my $db = $dsn->checked_dbh; ($table, $field) = split /\./, $field; my $st = sql_exec $db, "show columns from $table like ?", $field; my $type = $st->fetchrow_arrayref; defined $type or fancydie "no such table", "$table.$field"; $type = utf8_on $type->[1]; if ($type =~ /^(set|enum)\('(.*)'\)$/) { for (split /','/, $2) { scan_add $lang, $_, "DB:".$dsn->dsn.":$table:$field:$1"; } } else { my $row; my $st = sql_exec $db, \my($msgid), "select $field from $table"; my $prefix = $dsn->dsn."/$table.$field"; while ($st->fetch) { utf8_on $msgid; $row++; if ($style eq "code" or ($style eq "auto" and $msgid =~ /[_]_"(?:[^"\\]+|\\.)+"/s)) { scan_str "$prefix:$row $msgid", $msgid, $lang; } else { scan_add $lang, $msgid, "$prefix:$row"; } } } } =item scan_end =cut sub scan_end { local $PApp::SQL::DBH = PApp::Config::DBH; my $st0 = $PApp::SQL::DBH->prepare("select nr from msgid where id = ? and domain = ? and lang = ?"); my $st1 = $PApp::SQL::DBH->prepare("update msgid set context = ? where nr = ?"); while (my ($lang, $v) = each %scan_msg) { while (my ($msg, $context) = each %$v) { $context = join "\n", @$context; utf8_on $msg; utf8_on $lang; utf8_upgrade $context; $st0->execute($msg, $scan_app, $lang); my $nr = $st0->fetchrow_arrayref; if ($nr) { $st1->execute($context, $nr->[0]); $st1->finish; } else { $nr = sql_insertid sql_exec "insert into msgid (id, domain, lang, context) values (?, ?, ?, ?)", $msg, $scan_app, $lang, $context; # now enter existing, similar, translations my $trans = fuzzy_translation $msg, $scan_app; while (my ($lang, $str) = each %$trans) { sql_exec "insert into msgstr (nr, lang, flags, msg) values (?, ?, 'fuzzy', ?)", $nr, $lang, $str; } } } } my $st = sql_exec \my($nr), "select nr from msgid where domain = ? and context = ''", $scan_app; while ($st->fetch) { sql_exec "update msgstr set flags = flags | 4 where nr = ?", $nr; } ($scan_app, $scan_lang, %scan_msg) = (); } =item export_dpo $domain, $path, [$userid, $groupid, $attr] Export translation domain C<$domain> in binary hash format to directory C<$path>, creating it if necessary. =cut sub export_dpo($$;$$) { my ($domain, $path, $uid, $gid, $attr) = @_; local $PApp::SQL::DBH = PApp::Config::DBH; mkdir $path, defined $attr ? $attr | 0111 : 0755; chown $uid, $gid, $path if defined $uid; unlink for glob "$path/*.dpo"; for my $lang (sql_fetchall "select distinct s.lang from msgid i, msgstr s where i.domain = ? and i.nr = s.nr", $domain) { my $pofile = "$path/$lang.dpo"; my $st = sql_exec \my($id, $msg), "select id, msg from msgid i, msgstr s where i.domain = ? and i.nr = s.nr and s.lang = ? and s.flags & 1 and msg != '' order by 2", $domain, $lang; my $rows = $st->rows; if ($rows) { my $prime = int ($rows * 4 / 3) | 1; { use integer; outer: for (;; $prime += 2) { my $max = int sqrt $prime; for (my $i = 3; $i <= $max; $i += 2) { next outer unless $prime % $i; } last; } } my $dpo = new PApp::I18n::DPO_Writer "$pofile~", $prime; while ($st->fetch) { $dpo->add(utf8_on $id,utf8_on $msg) if $id ne $msg; } undef $dpo; chown $uid, $gid, "$pofile~" if defined $uid; chmod $attr, "$pofile~" if defined $attr; rename "$pofile~", $pofile; push @files, $pofile; } else { unlink $pofile; } } } package PApp::I18n::PO_Reader; use Carp; =back =head2 PO Reading and Writing CLASS PApp::I18n::PO_Reader This class can be used to read serially through a .po file. (where "po file" is about the same thing as a standard "Portable Object" file from the NLS standard developed by Uniforum). =over 4 =item $po = new PApp::I18n::PO_Reader $pathname Opens the given file for reading. =cut sub new { my ($class, $path) = @_; my $self; $self->{path} = $path; open $self->{fh}, "<", $path or croak "unable to open '$path' for reading: $!"; bless $self, $class; } =item ($msgid, $msgstr, @comments) = $po->next; Read the next entry. Returns nothing on end-of-file. =cut sub peek { my $self = shift; unless ($self->{line}) { do { chomp ($self->{line} = $self->{fh}->getline); Convert::Scalar::utf8_on $self->{line}; } while defined $self->{line} && $self->{line} =~ /^\s*$/; } $self->{line}; } sub line { my $self = shift; $self->peek; delete $self->{line}; } sub perr { my $self = shift; croak "$_[0], at $self->{path}:$."; } sub next { my $self = shift; my ($id, $str, @c); while ($self->peek =~ /^\s*#(.*)$/) { push @c, $1; $self->line; } if ($self->peek =~ /^\s*msgid/) { while ($self->peek =~ /^\s*(?:msgid\s+)?\"(.*)\"\s*$/) { $self->{line} =~ /^\s*(?:msgid\s+)?\"(.*)\"\s*$/; #d# DEVEL9021, redo regex on var $id .= PApp::I18n::unquote $1; $self->line; } if ($self->peek =~ /^\s*msgstr/) { while ($self->peek =~ /^\s*(?:msgstr\s+)?\"(.*)\"\s*$/) { $self->{line} =~ /^\s*(?:msgstr\s+)?\"(.*)\"\s*$/; #d# DEVEL9021, redo regex on var $str .= PApp::I18n::unquote $1; $self->line; } } elsif ($self->peek =~ /\S/) { $self->perr("expected msgstr, not "); } else { return; } } elsif ($self->peek =~ /\S/) { $self->perr("expected msgid"); } else { return; } ($id, $str, @c); } package PApp::I18n::PO_Writer; use Carp; =back CLASS PApp::I18n::PO_Writer This class can be used to write a new .po file. (where "po file" is about the same thing as a standard "Portable Object" file from the NLS standard developed by Uniforum). =over 4 =item $po = new PApp::I18n::PO_Writer $pathname Opens the given file for writing. =cut sub new { my ($class, $path) = @_; my $self; $self->{path} = $path; open $self->{fh}, ">:utf8", $path or croak "unable to open '$path' for writing: $!"; bless $self, $class; } =item $po->add ($msgid, $msgstr, @comments); Write another entry to the po file. See PO_Reader's C method. =cut sub splitstr($) { local $_ = "\"" . (PApp::I18n::quote shift) . "\"\n"; if (s/\\n(..)/\\n"\n"$1/g) { $_ = "\"\"\n" . $_; } $_; } sub add { my $self = shift; my ($id, $str, @c) = @_; $self->{fh}->print( (map "#$_\n", @c), "msgid " , splitstr $id, "msgstr ", splitstr $str, "\n" ); } package PApp::I18n; 1; =back =head1 AUTHOR Marc Lehmann http://home.schmorp.de/ =cut # the following data tables are originally from http://iso.plan9.de/ __DATA__ abw aw Aruba afg af Afghanistan ago ao Angola aia ai Anguilla alb al Albania and ad Andorra ant an Netherlands Antilles are ae Arab Emirates arg ar Argentina arm am Armenia asm as American Samoa atg ag Antigua and Barbuda aus au Australia aut at Austria aze az Azerbaijan bdi bi Burundi bel be Belgium ben bj Benin bfa bf Burkina Faso bgd bd Bangladesh bgr bg Bulgaria bhr bh Bahrain bhs bs Bahamas bih ba Bosnia and Herzegovina blr by Belarus blz bz Belize bmu bm Bermuda bol bo Bolivia bra br Brazil brb bb Barbados brn bn Brunei btn bt Bhutan bwa bw Botswana caf cf Central African Republic can ca Canada che ch Switzerland chl cl Chile chn cn China civ ci Côte d'Ivoire cmr cm Cameroon cod cd Congo cog cg Congo cok ck Cook Islands col co Colombia com km Comoros cpv cv Cape Verde cri cr Costa Rica cub cu Cuba cym ky Cayman Islands cyp cy Cyprus cze cz Czech Republic deu de Germany dji dj Djibouti dma dm Dominica dnk dk Denmark dom do Dominican Republic dza dz Algeria ecu ec Ecuador egy eg Egypt eri er Eritrea esh eh Western Sahara esp es Spain est ee Estonia eth et Ethiopia fin fi Finland fji fj Fiji flk fk Malvinas fra fr France fro fo Faeroe Islands fsm fm Micronesia gab ga Gabon gbr gb United Kingdom geo ge Georgia gha gh Ghana gib gi Gibraltar gin gn Guinea glp gp Guadeloupe gmb gm Gambia gnb gw Guinea-Bissau gnq gq Equatorial Guinea grc gr Greece grd gd Grenada grl gl Greenland gtm gt Guatemala guf gf French Guiana gum gu Guam guy gy Guyana hkg hk Hong Kong hnd hn Honduras hrv hr Croatia hti ht Haiti hun hu Hungary idn id Indonesia ind in India irl ie Ireland irn ir Iran irq iq Iraq isl is Iceland isr il Israel ita it Italy jam jm Jamaica jor jo Jordan jpn jp Japan kaz kz Kazakhstan ken ke Kenya kgz kg Kyrgyzstan khm kh Cambodia kir ki Kiribati kna kn Saint Kitts and Nevis kor kr Republic of Korea kwt kw Kuwait lao la Lao lbn lb Lebanon lbr lr Liberia lby ly Jamahiriya lca lc Saint Lucia lie li Liechtenstein lka lk Sri Lanka lso ls Lesotho ltu lt Lithuania lux lu Luxembourg lva lv Latvia mac mo Macao mar ma Morocco mco mc Monaco mda md Moldova mdg mg Madagascar mdv mv Maldives mex mx Mexico mhl mh Marshall Islands mkd mk Macedonia mli ml Mali mlt mt Malta mmr mm Myanmar mng mn Mongolia mnp mp Mariana Islands moz mz Mozambique mrt mr Mauritania msr ms Montserrat mtq mq Martinique mus mu Mauritius mwi mw Malawi mys my Malaysia nam na Namibia ncl nc New Caledonia ner ne Niger nfk nf Norfolk Island nga ng Nigeria nic ni Nicaragua niu nu Niue nld nl Netherlands nor no Norway npl np Nepal nru nr Nauru nzl nz New Zealand omn om Oman pak pk Pakistan pan pa Panama pcn pn Pitcairn per pe Peru phl ph Philippines plw pw Palau png pg Papua New Guinea pol pl Poland pri pr Puerto Rico prk kp Korea prt pt Portugal pry py Paraguay pse ps Palestine pyf pf French Polynesia qat qa Qatar reu re Réunion rom ro Romania rus ru Russia rwa rw Rwanda sau sa Saudi Arabia sdn sd Sudan sen sn Senegal sgp sg Singapore shn sh Saint Helena sjm sj Svalbard and Jan Mayen Islands slb sb Solomon Islands sle sl Sierra Leone slv sv El Salvador smr sm San Marino som so Somalia spm pm Saint Pierre and Miquelon stp st São Tome and Principe sur sr Suriname svk sk Slovakia svn si Slovenia swe se Sweden swz sz Swaziland syc sc Seychelles syr sy Syria tca tc Turks and Caicos Islands tcd td Chad tgo tg Togo tha th Thailand tjk tj Tajikistan tkl tk Tokelau tkm tm Turkmenistan tmp tp East Timor ton to Tonga tto tt Trinidad and Tobago tun tn Tunisia tur tr Turkey tuv tv Tuvalu twn tw Taiwan tza tz Tanzania uga ug Uganda ukr ua Ukraine ury uy Uruguay usa us United States uzb uz Uzbekistan vat va Holy See vct vc Saint Vincent and the Grenadines ven ve Venezuela vgb vg British Virgin Islands vir vi Virgin Islands vnm vn Viet Nam vut vu Vanuatu wlf wf Wallis and Futuna Islands wsm ws Samoa yem ye Yemen scg cs Serbia and Montenegro zaf za South Africa zmb zm Zambia zwe zw Zimbabwe __SPLIT__ aar aa Afar abk ab Abkhazian ace Achinese ach Acoli ada Adangme afa Afro-Asiatic (Other) afh Afrihili afr af Afrikaans aka Akan akk Akkadian ale Aleut alg Algonquian languages amh am Amharic ang English, Old (ca. 450-1100) apa Apache languages ara ar Arabic arc Aramaic arn Araucanian arp Arapaho art Artificial (Other) arw Arawak asm as Assamese ath Athapascan languages aus Australian languages ava Avaric ave ae Avestan awa Awadhi aym ay Aymara aze az Azerbaijani bad Banda bai Bamileke languages bak ba Bashkir bal Baluchi bam Bambara ban Balinese bas Basa bat Baltic (Other) bej Beja bel be Belarusian bem Bemba ben bn Bengali ber Berber (Other) bho Bhojpuri bih bh Bihari bik Bikol bin Bini bis bi Bislama bla Siksika bnt Bantu (Other) bod bo Tibetan bos bs Bosnian bra Braj bre br Breton btk Batak (Indonesia) bua Buriat bug Buginese bul bg Bulgarian cad Caddo cai Central American Indian (Other) car Carib cat ca Catalan cau Caucasian (Other) ceb Cebuano cel Celtic (Other) ces cs Czech cha ch Chamorro chb Chibcha che ce Chechen chg Chagatai chk Chuukese chm Mari chn Chinook jargon cho Choctaw chp Chipewyan chr Cherokee chu cu Church Slavic chv cv Chuvash chy Cheyenne cmc Chamic languages cop Coptic cor kw Cornish cos co Corsican cpe Creoles and pidgins, English based (Other) cpf Creoles and pidgins, French-based (Other) cpp Creoles and pidgins, Portuguese-based (Other) cre Cree crp Creoles and pidgins (Other) cus Cushitic (Other) cym cy Welsh dak Dakota dan da Danish day Dayak del Delaware den Slave (Athapascan) deu de German dgr Dogrib din Dinka div Divehi doi Dogri dra Dravidian (Other) dua Duala dum Dutch, Middle (ca. 1050-1350) dyu Dyula dzo dz Dzongkha efi Efik egy Egyptian (Ancient) eka Ekajuk ell el Greek, Modern (1453-) elx Elamite eng en English enm English, Middle (1100-1500) epo eo Esperanto est et Estonian eus eu Basque ewe Ewe ewo Ewondo fan Fang fao fo Faroese fas fa Persian fat Fanti fij fj Fijian fin fi Finnish fiu Finno-Ugrian (Other) fon Fon fra fr French frm French, Middle (ca. 1400-1600) fro French, Old (842-ca. 1400) fry fy Frisian ful Fulah fur Friulian gaa Ga gay Gayo gba Gbaya gem Germanic (Other) gez Geez gil Gilbertese gla gd Gaelic (Scots) gle ga Irish glg gl Gallegan glv gv Manx gmh German, Middle High (ca. 1050-1500) goh German, Old High (ca. 750-1050) gon Gondi gor Gorontalo got Gothic grb Grebo grc Greek, Ancient (to 1453) grn gn Guarani guj gu Gujarati gwi Gwich´in hai Haida hau ha Hausa haw Hawaiian heb he Hebrew her hz Herero hil Hiligaynon him Himachali hin hi Hindi hit Hittite hmn Hmong hmo ho Hiri Motu hrv hr Croatian hun hu Hungarian hup Hupa hye hy Armenian iba Iban ibo Igbo ijo Ijo iku iu Inuktitut ile ie Interlingue ilo Iloko ina ia Interlingua (International Auxiliary Language Association) inc Indic (Other) ind id Indonesian ine Indo-European (Other) ipk ik Inupiaq ira Iranian (Other) iro Iroquoian languages isl is Icelandic ita it Italian jaw jw Javanese jpn ja Japanese jpr Judeo-Persian kaa Kara-Kalpak kab Kabyle kac Kachin kal kl Kalaallisut kam Kamba kan kn Kannada kar Karen kas ks Kashmiri kat ka Georgian kau Kanuri kaw Kawi kaz kk Kazakh kha Khasi khi Khoisan (Other) khm km Khmer kho Khotanese kik ki Kikuyu kin rw Kinyarwanda kir ky Kirghiz kmb Kimbundu kok Konkani kom kv Komi kon Kongo kor ko Korean kos Kosraean kpe Kpelle kro Kru kru Kurukh kum Kumyk kur ku Kurdish kut Kutenai lad Ladino lah Lahnda lam Lamba lao lo Lao lat la Latin lav lv Latvian lez Lezghian lin ln Lingala lit lt Lithuanian lol Mongo loz Lozi ltz lb Letzeburgesch lua Luba-Lulua lub Luba-Katanga lug Ganda lui Luiseno lun Lunda luo Luo (Kenya and Tanzania) lus lushai mad Madurese mag Magahi mah mh Marshall mai Maithili mak Makasar mal ml Malayalam man Mandingo map Austronesian (Other) mar mr Marathi mas Masai mdr Mandar men Mende mga Irish, Middle (900-1200) mic Micmac min Minangkabau mis Miscellaneous languages mkd mk Macedonian mkh Mon-Khmer (Other) mlg mg Malagasy mlt mt Maltese mnc Manchu mni Manipuri mno Manobo languages moh Mohawk mol mo Moldavian mon mn Mongolian mos Mossi mri mi Maori msa ms Malay mul Multiple languages mun Munda languages mus Creek mwr Marwari mya my Burmese myn Mayan languages nah Nahuatl nai North American Indian nau na Nauru nav nv Navajo nbl nr Ndebele, South nde nd Ndebele, North ndo ng Ndonga nds Low German; Low Saxon; German, Low; Saxon, Low nep ne Nepali new Newari nia Nias nic Niger-Kordofanian (Other) niu Niuean nld nl Dutch nno nn Norwegian Nynorsk nob nb Norwegian Bokmål non Norse, Old nor no Norwegian nso Sotho, Northern nub Nubian languages nya ny Chichewa; Nyanja nym Nyamwezi nyn Nyankole nyo Nyoro nzi Nzima oci oc Occitan (post 1500); Provençal oji Ojibwa ori or Oriya orm om Oromo osa Osage oss os Ossetian; Ossetic ota Turkish, Ottoman (1500-1928) oto Otomian languages paa Papuan (Other) pag Pangasinan pal Pahlavi pam Pampanga pan pa Panjabi pap Papiamento pau Palauan peo Persian, Old (ca. 600-400 b.c.) phi Philippine (Other) pli pi Pali pol pl Polish pon Pohnpeian por pt Portuguese pra Prakrit languages pro Provençal, Old (to 1500) pus ps Pushto que qu Quechua raj Rajasthani rap Rapanui rar Rarotongan roa Romance (Other) rom Romany ron ro Romanian run rn Rundi rus ru Russian sad Sandawe sag sg Sango sah Yakut sai South American Indian (Other) sal Salishan languages sam Samaritan Aramaic san sa Sanskrit sas Sasak sat Santali sco Scots sel Selkup sem Semitic (Other) sga Irish, Old (to 900) sgn Sign Languages shn Shan sid Sidamo sin si Sinhalese sio Siouan languages sit Sino-Tibetan (Other) sla Slavic (Other) slk sk Slovak slv sl Slovenian sme se Northern Sami smi Sami languages (Other) smo sm Samoan sna sn Shona snd sd Sindhi snk Soninke sog Sogdian som so Somali son Songhai sot st Sotho, Southern spa es Spanish sqi sq Albanian srd sc Sardinian srp sr Serbian srr Serer ssa Nilo-Saharan (Other) ssw ss Swati suk Sukuma sun su Sundanese sus Susu sux Sumerian swa sw Swahili swe sv Swedish syr Syriac tah ty Tahitian tai Tai (Other) tam ta Tamil tat tt Tatar tel te Telugu tem Timne ter Tereno tet Tetum tgk tg Tajik tgl tl Tagalog tha th Thai tig Tigre tir ti Tigrinya tiv Tiv tkl Tokelau tli Tlingit tmh Tamashek tog Tonga (Nyasa) ton to Tonga (Tonga Islands) tpi Tok Pisin tsi Tsimshian tsn tn Tswana tso ts Tsonga tuk tk Turkmen tum Tumbuka tur tr Turkish tut Altaic (Other) tvl Tuvalu twi tw Twi tyv Tuvinian uga Ugaritic uig ug Uighur ukr uk Ukrainian umb Umbundu und Undetermined urd ur Urdu uzb uz Uzbek vai Vai ven Venda vie vi Vietnamese vol vo Volapük vot Votic wak Wakashan languages wal Walamo war Waray was Washo wen Sorbian languages wol wo Wolof xho xh Xhosa yao Yao yap Yapese yid yi Yiddish yor yo Yoruba ypk Yupik languages zap Zapotec zen Zenaga zha za Zhuang zho zh Chinese znd Zande zul zu Zulu zun Zuni __SPLIT__ afr iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 ara iso-8859-6,iso-ir-127,cp1256 bel iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866 bre iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-14 bul iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866 cat iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 ces iso-8859-2,cp1250,iso-ir-101 cor iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-14 cym iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-14 dan cp1252,iso-8859-9,iso-8859-1,iso-8859-15,cp819,iso-ir-100 dan_dnk iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-646-dk deu iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 ell iso-8859-7,cp1253,iso-ir-126 eng iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 eng_usa us-ascii,iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,cp367 epo iso-8859-3,iso-ir-109 est iso-8859-4,iso-8859-10,cp1257,iso-ir-110,iso-8859-15 eus iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 fao iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 fas iso-8859-6,iso-ir-127 fin iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 fra iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 gla iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-14 gle iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-14 glg iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 glv iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-14 heb iso-8859-8,cp1255 hrv iso-8859-2,cp1250,iso-ir-101 hun iso-8859-2,cp1250,iso-ir-101,iso-ir-87 hye armscii-8 iku nunacom-8 ind iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 ipk iso-8859-10 isl iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 ita iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 jpn euc-jp,sjis,iso-2022-jp,jis7 kal iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 kor euc-kr,uhc,johab,iso-2022-kr,iso-646-kr,ksc5636 lao mulelao-1,ibm-cp1133 lav iso-8859-4,iso-8859-10,cp1257,iso-ir-110,iso-8859-13 lit iso-8859-4,iso-8859-10,cp1257,iso-ir-110,iso-8859-13 mkd iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866 mkd_mkd iso-ir-147,iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866 mlt iso-8859-3,iso-ir-109 nld iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 nno iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 nob iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 nor iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 oci iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 pol iso-8859-2,cp1250,iso-ir-101 por iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 ron iso-8859-2,cp1250,iso-ir-101 rus koi8-r,iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866,koi8-u rus_ukr koi8-u,iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866 slk iso-8859-2,cp1250,iso-ir-101 slv iso-8859-2,cp1250,iso-ir-101 sme iso-8859-10 spa iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 sqi iso-8859-2,cp1250,iso-ir-101,iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100,iso-8859-9 srp iso-8859-2,cp1250,iso-ir-101,iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866,iso-ir-146 swa iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 swe iso-8859-1,iso-8859-15,cp1252,cp819,iso-ir-100 tha cp874,tis-620,iso-8859-11 tur iso-8859-9,iso-8859-3,iso-ir-109,cp1254 ukr iso-8859-5,cp1251,iso-ir-111,iso-ir-144,cp866,koi8-u vie cp1258,viscii,tcvn5712,vps zho euc-tw,big5 zho_chn euc-cn,gbk,iso-2022-cn,iso-ir-58