package I18NFool::Extractor; use MKDoc::XML::TreeBuilder; use Locale::PO; use warnings; use strict; our $Namespace = "http://xml.zope.org/namespaces/i18n"; our $Prefix = 'i18n'; our $Domain = 'default'; our $Results = {}; sub process { my $class = shift; my $data = shift; local $Namespace = $Namespace; local $Prefix = $Prefix; local $Domain = $Domain; local $Results = {}; my @nodes = MKDoc::XML::TreeBuilder->process_data ($data); for (@nodes) { $class->_process ($_) } return $Results; } sub _process { my $class = shift; my $tree = shift; return unless (ref $tree); local $Prefix = $Prefix; local $Domain = $Domain; # process the I18N namespace foreach my $key (keys %{$tree}) { my $value = $tree->{$key}; if ($value eq $Namespace) { next unless ($key =~ /^xmlns\:/); delete $tree->{$key}; $Prefix = $key; $Prefix =~ s/^xmlns\://; } } # set the current i18n:domain $Domain = delete $tree->{"$Prefix:domain"} || $Domain; my $tag = $tree->{_tag}; my $attr = { map { /^_/ ? () : ( $_ => $tree->{$_} ) } keys %{$tree} }; return if ($tag eq '~comment' or $tag eq '~pi' or $tag eq '~declaration'); # lookup for attributes... $tree->{"$Prefix:attributes"} && do { my $attributes = $tree->{"$Prefix:attributes"}; $attributes =~ s/\s*;\s*$//; $attributes =~ s/^\s*//; my @attributes = split /\s*\;\s*/, $attributes; foreach my $attribute (@attributes) { # if we have i18n:attributes="alt alt_text", then the # attribute name is 'alt' and the # translate_id is 'alt_text' my ($attribute_name, $translate_id); if ($attribute =~ /\s/) { ($attribute_name, $translate_id) = split /\s+/, $attribute, 2; } # otherwise, if we have i18n:attributes="alt", then the # attribute name is 'alt' and the # translate_id is $tree->{'alt'} else { $attribute_name = $attribute; $translate_id = _canonicalize ( $tree->{$attribute_name} ); } $translate_id || next; $Results->{$Domain} ||= {}; my $existing_po = $Results->{$Domain}->{$translate_id}; my $new_po = Locale::PO->new ( -msgid => $translate_id, -msgstr => _canonicalize ( $tree->{$attribute_name} ) || '', ); if ($existing_po && ($existing_po->{msgstr} ne $new_po->{msgstr})) { print STDERR "String for '$translate_id' doesn't match:\n". " old: $existing_po->{msgstr}\n". " new: $new_po->{msgstr}\n" } $Results->{$Domain}->{$translate_id} = $new_po; } }; # lookup for content... exists $tree->{"$Prefix:translate"} && do { my ($translate_id); # if we have $Domain:translate="something", # then the translate_id is 'something' if (defined $tree->{"$Prefix:translate"} and $tree->{"$Prefix:translate"} ne '') { $translate_id = $tree->{"$Prefix:translate"}; } # otherwise, the translate_id has to be computed # from the contents of this node, so that #
Hello, David, how are you?
# becomes 'Hello, ${user}, how are you?' else { $translate_id = _canonicalize ( _extract_content_string ($tree) ); } $translate_id || next; $Results->{$Domain} ||= {}; my $existing_po = $Results->{$Domain}->{$translate_id}; my $new_po = Locale::PO->new ( -msgid => $translate_id, -msgstr => _canonicalize ( _extract_content_string ($tree) ) || '', ); if ($existing_po && ($existing_po->{msgstr} ne $new_po->{msgstr})) { print STDERR "String for '$translate_id' doesn't match:\n". " old: $existing_po->{msgstr}\n". " new: $new_po->{msgstr}\n" } $Results->{$Domain}->{$translate_id} = $new_po; }; # I know, I know, the I18N namespace processing is a bit broken... # It should suffice for now delete $tree->{"$Prefix:attributes"}; delete $tree->{"$Prefix:translate"}; delete $tree->{"$Prefix:name"}; # Do the same i18n thing with child nodes, recursively. # for some reason it always makes me think of roller coasters. # Yeeeeeeee! defined $tree->{_content} and do { for (@{$tree->{_content}}) { $class->_process ($_) } }; } sub _canonicalize { my $string = shift || ''; $string =~ s/\r/ /g; $string =~ s/\n/ /g; $string =~ s/\s+/ /gsm; $string =~ s/^ //; $string =~ s/ $//; return $string; } sub _extract_content_string { my $tree = shift; my @res = (); my $count = 0; foreach my $node (@{$tree->{_content}}) { ref $node or do { push @res, $node; next; }; $count++; my $name = $node->{"$Prefix:name"} || $count; push @res, '${' . $name . '}'; } return join '', @res; } 1;