# Copyright 2007, 2008, 2009, 2010, 2011 Kevin Ryde # This file is part of Chart. # # Chart is free software; you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation; either version 3, or (at your option) any later version. # # Chart is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Chart. If not, see . package App::Chart::Texinfo::Util; use 5.004; use strict; use warnings; use Exporter; use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION); @ISA = qw(Exporter); @EXPORT_OK = qw(node_to_html_anchor); %EXPORT_TAGS = (all => \@EXPORT_OK); # Nodes "HTML Xref Node Name Expansion" # and "HTML Xref 8-bit Character Expansion" in the Texinfo manual # Given $node is wide-char. # sub node_to_html_anchor { my ($node) = @_; # precomposed characters where possible if ($node =~ /[^[:ascii:]]/) { require Unicode::Normalize; $node = Unicode::Normalize::NFC ($node); } # rule 3 multiple space,tab,newline become one space $node =~ s/[ \t\n]+/ /g; # rule 4 lose leading and trailing space $node =~ s/^ +//; $node =~ s/ +$//; # rule 6 chars except ascii alnum and the "-" (just inserted) become hex $node =~ s/([^ A-Za-z0-9])/_escape_char($1)/ge; # rule 5 remaining spaces become dashes $node =~ tr/ /-/; # rule 7 prepend "g_t" if doesn't begin with alpha if ($node =~ /^[^A-Za-z]/) { $node = 'g_t' . $node; } return $node; } # ENHANCE-ME: For EBCDIC presumably a UTF-EBCDIC -> unicode conversion is # needed here, instead of just ord(). sub _escape_char { my ($c) = @_; # single-char string $c = ord($c); if ($c <= 0xFFFF) { return sprintf ('_%04x', $c); } elsif ($c <= 0xFF_FFFF) { return sprintf ('__%06x', $c); } } 1; __END__ =for stopwords texinfo Texinfo utf unicode =head1 NAME App::Chart::Texinfo::Util -- some texinfo utilities =for test_synopsis my ($anchor, $node) =head1 SYNOPSIS use App::Chart::Texinfo::Util; $anchor = App::Chart::Texinfo::Util::node_to_html_anchor ($node); # or imported use App::Chart::Texinfo::Util ':all'; $anchor = node_to_html_anchor ($node); =head1 DESCRIPTION A function which hasn't found a better place to live yet. =head1 FUNCTIONS =over 4 =item C<$string = App::Chart::Texinfo::Util::node_to_html_anchor ($node)> Return a HTML anchor for a Texinfo node name, as per anchor generation specified in the Texinfo manual "HTML Xref Node Name Expansion" and "HTML Xref 8-bit Character Expansion". It encodes various spaces and non-alphanumeric characters as hexadecimal "_HHHH" sequences. For example, App::Chart::Texinfo::Util::node_to_html_anchor ('My Node-Name') # returns 'My-Node_002dName' Perl utf8 wide-char strings can be passed here. Characters beyond 255 are taken to be unicode and encoded as 4 or 6 hex digits per the Texinfo spec. =back =head1 SEE ALSO L =cut