package Locale::Maketext::Fuzzy; $Locale::Maketext::Fuzzy::VERSION = '0.10'; use 5.005; use strict; use Locale::Maketext; use base 'Locale::Maketext'; sub override_maketext { my ( $class, $flag ) = @_; $class = ref($class) if ref($class); no strict 'refs'; if ($flag) { *{"$class\::maketext"} = \&maketext_fuzzy; } elsif ( @_ >= 2 ) { delete ${"$class\::"}{maketext}; } return ( defined &{"$class\::maketext"} ? 1 : 0 ); } # Global cache of entries and their regexified forms my %regex_cache; sub maketext_fuzzy { my ( $handle, $phrase ) = splice( @_, 0, 2 ); # An array of all lexicon hashrefs my @lexicons = @{ $handle->_lex_refs }; # Try exact match if possible at all. foreach my $lex (@lexicons) { return $handle->SUPER::maketext( $phrase, @_ ) if exists $lex->{$phrase}; } # Keys are matched entries; values are arrayrefs of extracted params my %candidate; # Fuzzy match phase 1 -- extract all candidates foreach my $lex (@lexicons) { # We're not interested in non-bracketed entries, so ignore them foreach my $entry ( grep /(?:(?[0] ) or next; $candidate{$entry} ||= ( @{ $re->[1] } ? [ @vars[ @{ $re->[1] } ] ] : \@vars ); } } # Fail early if we cannot find anything that matches return $phrase unless %candidate; # Fuzzy match phase 2 -- select the best candidate $phrase = ( sort { # For now, we just use a very crude heuristic: "Longer is better" length($b) <=> length($a) or $b cmp $a } keys %candidate )[0]; return $handle->SUPER::maketext( $phrase, @{ $candidate{$phrase} }, @_ ); } sub _regexify { my $text = quotemeta(shift); my @ords; $text =~ s{ ( # capture into $1... (? length($a) # longest first } map { /^_(?:(\d+)|\\\*)$/ ? do { push @{$ordref}, ( $1 - 1 ) if defined $1; ''; } : $_ # turn _1, _2, _*... into '' } @choices ) . ')'; $out =~ s/\Q(?:)\E$//; } return $out; } 1; =head1 NAME Locale::Maketext::Fuzzy - Maketext from already interpolated strings =head1 VERSION This document describes version 0.10 of Locale::Maketext::Fuzzy, released October 14, 2007. =head1 SYNOPSIS package MyApp::L10N; use base 'Locale::Maketext::Fuzzy'; # instead of Locale::Maketext package MyApp::L10N::de; use base 'MyApp::L10N'; our %Lexicon = ( # Exact match should always be preferred if possible "0 camels were released." => "Exact match", # Fuzzy match candidate "[quant,_1,camel was,camels were] released." => "[quant,_1,Kamel wurde,Kamele wurden] freigegeben.", # This could also match fuzzily, but is less preferred "[_2] released[_1]" => "[_1][_2] ist frei[_1]", ); package main; my $lh = MyApp::L10N->get_handle('de'); # All ->maketext calls below will become ->maketext_fuzzy instead $lh->override_maketext(1); # This prints "Exact match" print $lh->maketext('0 camels were released.'); # "1 Kamel wurde freigegeben." -- quant() gets 1 print $lh->maketext('1 camel was released.'); # "2 Kamele wurden freigegeben." -- quant() gets 2 print $lh->maketext('2 camels were released.'); # "3 Kamele wurden freigegeben." -- parameters are ignored print $lh->maketext('3 released.'); # "4 Kamele wurden freigegeben." -- normal usage print $lh->maketext('[*,_1,camel was,camels were] released.', 4); # "!Perl ist frei!" -- matches the broader one # Note that the sequence ([_2] before [_1]) is preserved print $lh->maketext('Perl released!'); =head1 DESCRIPTION This module is a subclass of C, with additional support for localizing messages that already contains interpolated variables. This is most useful when the messages are returned by external sources -- for example, to match C against C<[_1]: command not found>. Of course, this module is also useful if you're simply too lazy to use the $lh->maketext("[quant,_1,file,files] deleted.", $count); syntax, but wish to write $lh->maketext_fuzzy("$count files deleted"); instead, and have the correct plural form figured out automatically. If C seems too long to type for you, this module also provides a C method to turn I C calls into C calls. =head1 METHODS =head2 $lh->maketext_fuzzy(I[, I]); That method takes exactly the same arguments as the C method of C. If I is found in lexicons, it is applied in the same way as C. Otherwise, it looks at all lexicon entries that could possibly yield I, by turning C<[...]> sequences into C<(.*?)> and match the resulting regular expression against I. Once it finds all candidate entries, the longest one replaces the I for the real C call. Variables matched by its bracket sequences (C<$1>, C<$2>...) are placed before I; the order of variables in the matched entry are correctly preserved. For example, if the matched entry in C<%Lexicon> is C, this call: $fh->maketext_fuzzy("Test string", "param"); is equivalent to this: $fh->maketext("Test [_1]", "string", "param"); However, most of the time you won't need to supply I to a C call, since all parameters are already interpolated into the string. =head2 $lh->override_maketext([I]); If I is true, this accessor method turns C<$lh-Emaketext> into an alias for C<$lh-Emaketext_fuzzy>, so all consecutive C calls in the C<$lh>'s packages are automatically fuzzy. A false I restores the original behaviour. If the flag is not specified, returns the current status of override; the default is 0 (no overriding). Note that this call only modifies the symbol table of the I that C<$lh> belongs to, so other languages are not affected. If you want to override all language handles in a certain application, try this: MyApp::L10N->override_maketext(1); =head1 CAVEATS =over 4 =item * The "longer is better" heuristic to determine the best match is reasonably good, but could certainly be improved. =item * Currently, C<"[quant,_1,file] deleted"> won't match C<"3 files deleted">; you'll have to write C<"[quant,_1,file,files] deleted"> instead, or simply use C<"[_1] file deleted"> as the lexicon key and put the correct plural form handling into the corresponding value. =item * When used in combination with C's C backend, all keys would be iterated over each time a fuzzy match is performed, and may cause serious speed penalty. Patches welcome. =back =head1 SEE ALSO L, L =head1 HISTORY This particular module was written to facilitate an I layer for Slashcode's I