Index: lib/BBCode/Util.pm
===================================================================
--- lib/BBCode/Util.pm (revision 92)
+++ lib/BBCode/Util.pm (working copy)
@@ -7,8 +7,10 @@
use URI ();
use strict;
use warnings;
+use utf8;
+use v5.8.0;
-our $VERSION = '0.20';
+our $VERSION = '0.30';
our @EXPORT;
our @EXPORT_OK;
our %EXPORT_TAGS;
@@ -183,22 +185,40 @@
return $_ ? 1 : 0;
}
+BEGIN { _export qw(parseInt parse) }
+sub parseInt($) {
+ my $num = shift;
+ return undef if not defined $num;
+ $num =~ s/[\s,_]+//g;
+ $num =~ s/^\+//;
+ return 0 if $num =~ /^-?$/;
+ return 0+$1 if $num =~ /^ ( -? \d+ ) $/x;
+ return undef;
+}
+
BEGIN { _export qw(parseNum parse) }
sub parseNum($);
sub parseNum($) {
- local $_ = $_[0];
- return undef if not defined $_;
- s/^\s+|\s+$//g;
- s/(?<=\d),(?=\d)//g;
- s/(?<=\d)_+(?=\d)//g;
- return 0 if /^ \. $/x;
- return 0+$1 if /^ ( [+-]? \d+ ) \.? $/x;
- return 0+$1 if /^ ( [+-]? \d* \. \d+ ) $/x;
- if(/^ ( [+-]? [\d.]* ) e ( [+-]? [\d.]* ) $/xi) {
- my($m,$e) = map parseNum($_), $1, $2;
+ my $num = shift;
+ return undef if not defined $num;
+ $num =~ s/[\s,_]+//g;
+ if($num =~ /^ (.*) e (.*) $/ix) {
+ my($m,$e) = ($1,$2);
+ $m = parseNum $m;
+ $e = parseNum $e;
return $m * (10 ** $e) if defined $m and defined $e;
+ return undef;
}
- return 0;
+ if($num =~ /^ ([^.]*) \. ([^.]*) $/x) {
+ my($i,$f) = ($1,$2);
+ $i = parseInt $i;
+ return undef unless defined $i;
+ return undef unless $f =~ /^(\d*)$/;
+ $num = "$i.$f";
+ $num =~ s/\.$//;
+ return 0+$num;
+ }
+ return parseInt($num);
}
BEGIN { _export qw(parseEntity parse) }
@@ -240,12 +260,17 @@
'a' => [ qw(ol lower-latin) ],
'I' => [ qw(ol upper-roman) ],
'i' => [ qw(ol lower-roman) ],
- "\x{3B1}" => [ qw(ol lower-greek) ],
- "\x{5D0}" => [ qw(ol hebrew) ],
- "\x{3042}" => [ qw(ol hiragana) ],
- "\x{3044}" => [ qw(ol hiragana-iroha) ],
- "\x{30A2}" => [ qw(ol katakana) ],
- "\x{30A4}" => [ qw(ol katakana-iroha) ],
+ 'Α' => [ qw(ol upper-greek) ],
+ 'α' => [ qw(ol lower-greek) ],
+ 'א' => [ qw(ol hebrew) ],
+ 'Ⴀ' => [ qw(ol georgian) ],
+ 'ა' => [ qw(ol georgian) ],
+ 'Ա' => [ qw(ol armenian) ],
+ 'ա' => [ qw(ol armenian) ],
+ 'あ' => [ qw(ol hiragana) ],
+ 'い' => [ qw(ol hiragana-iroha) ],
+ 'ア' => [ qw(ol katakana) ],
+ 'イ' => [ qw(ol katakana-iroha) ],
);
sub parseListType($) {
local $_ = $_[0];
@@ -255,8 +280,7 @@
@ret = ('ul', lc $1);
} elsif(/^(
decimal(?:-leading-zero)? |
- (?:upper|lower)-(?:roman|latin|alpha) |
- lower-greek |
+ (?:upper|lower)-(?:roman|latin|alpha|greek) |
hebrew |
georgian |
armenian |
@@ -271,75 +295,134 @@
return @ret;
}
+# Conversion factors from CSS units to points
my %conv = (
- px => 0.75,
-
+ # Integer conversions within English units
pt => 1,
pc => 12,
in => 72,
+ # Floating-point conversions from Metric units
mm => 72/25.4,
cm => 72/2.54,
- ex => 8,
- em => 12,
+ # Somewhat approximate, but the CSS standard is actually rather
+ # picky about how many pixels a 'pixel' is at different resolutions,
+ # so this is actually relatively reliable.
+ px => 0.75,
);
-# See <URL:http://www.w3.org/TR/CSS21/fonts.html#font-size-props>
-# Tweaked slightly to be more logical
+# Emulation of <font size="num">...</font> from HTML 3.2;
+# See <URL:http://www.w3.org/TR/CSS21/fonts.html#font-size-props>.
+# Tweaked slightly to be more logical.
my @compat = qw(xx-small x-small small medium large x-large xx-large 300%);
BEGIN { _export qw(parseFontSize parse) }
-sub parseFontSize($);
-sub parseFontSize($) {
- local $_ = $_[0];
+sub parseFontSize($;$$$);
+sub parseFontSize($;$$$) {
+ local $_ = shift;
return undef unless defined $_;
+ my($base,$lo,$hi) = @_;
+ $base = 12 if not defined $base;
+ $lo = 8 if not defined $lo;
+ $hi = 72 if not defined $hi;
s/\s+/ /g;
s/^\s|\s$//g;
- if(/^ (\d+ (?: \. \d+ )? ) \s? ([a-z]{2}) $/ix) {
- my($n,$unit) = (0+$1,lc $2);
- if(exists $conv{$unit}) {
- my $n2 = $n / $conv{$unit};
- if($n2 < 8) {
- $n = POSIX::floor(0.5 + 8 * $conv{$unit});
- } elsif($n2 > 72) {
- $n = POSIX::floor(0.5 + 72 * $conv{$unit});
- }
- return "$n$unit";
- }
- }
-
+ # CSS 2.1 15.7 <absolute-size>
if(/^( (?:xx?-)? (?:large|small) | medium )$/ix) {
return lc $1;
}
+ # CSS 2.1 15.7 <relative-size>
+ # Note: Since [FONT] is nestable and not readily computable before HTML
+ # rendering, this can allow a malicious user to escape the
+ # admin-defined font size limits
if(/^ ( larger | smaller ) $/ix) {
return lc $1;
}
+ # CSS 2.1 4.3.2 <length>
+ if(/^ ( [\s\d._+-]+ ) ( [a-z]+ ) $/ix) {
+ my($n,$unit) = ($1,lc $2);
+ $n = parseNum $n;
+ if(defined $n and $n > 0) {
+ my $conv;
+ if(exists $conv{$unit}) {
+ $conv = $conv{$unit};
+ } elsif($unit =~ /^em$/i) {
+ $conv = $base;
+ } elsif($unit =~ /^ex$/i) {
+ $conv = $base * 0.5;
+ } else {
+ return undef;
+ }
+ my $n2 = $n * $conv;
+ if(defined $lo and $n2 < $lo) {
+ $n = $lo / $conv;
+ } elsif(defined $hi and $n2 > $hi) {
+ $n = $hi / $conv;
+ }
+ $n = sprintf "%.3f", $n;
+ $n =~ s/0+$//;
+ $n =~ s/\.$//;
+ return "$n$unit";
+ } else {
+ return undef;
+ }
+ }
+
+ # CSS 2.1 4.3.3 <percentage>
+ # Note: The same concerns apply as for <relative-size>
+ if(/^ ( [\s\d._+-]+ ) % $/x) {
+ my $n = parseNum $1;
+ if(defined $n and $n > 0) {
+ $n *= 0.01;
+ my $n2 = $n * $base;
+ if(defined $lo and $n2 < $lo) {
+ $n = $lo / $base;
+ } elsif(defined $hi and $n2 > $hi) {
+ $n = $hi / $base;
+ }
+ $n *= 100;
+ $n = sprintf "%.3f", $n;
+ $n =~ s/0+$//;
+ $n =~ s/\.$//;
+ return "$n%";
+ } else {
+ return undef;
+ }
+ }
+
+ # HTML 3.2 <font size="number">
+ # See <URL:http://www.w3.org/TR/REC-html32#font>
if(/^ (\d+) $/x) {
my $n = 0+$1;
if($n >= 0 and $n < @compat) {
return $compat[$n];
} else {
- return parseFontSize("$n pt");
+ return parseFontSize("$n pt",$base,$lo,$hi);
}
}
+ # HTML 3.2 <font size="+number">
if(/^ \+ (\d+) $/x) {
- # Roughly equivalent to CSS 2.1 "larger"
- return parseFontSize sprintf "%d%%", 100 * (1.25 ** $1);
+ # "+1" is roughly equivalent to CSS 2.1 "larger"
+ my $n = sprintf "%f%%", 100 * (1.25 ** $1);
+ return parseFontSize($n,$base,$lo,$hi);
}
+ # HTML 3.2 <font size="-number">
if(/^ - (\d+) $/x) {
- # Roughly equivalent to CSS 2.1 "smaller"
- return parseFontSize sprintf "%d%%", 100 * (0.85 ** $1);
+ # "-1" is roughly equivalent to CSS 2.1 "smaller"
+ my $n = sprintf "%f%%", 100 * (0.85 ** $1);
+ return parseFontSize($n,$base,$lo,$hi);
}
return undef;
}
+# Official CSS 2.1 colors are passed through as-is
my %cssColor = map { $_ => 1 } qw(
maroon red orange yellow olive
purple fuchsia white lime green
@@ -347,6 +430,12 @@
black silver gray
);
+# Other named colors must map to an official named color or an #RRGGBB color
+my %extraColor = (
+ darkred => 'maroon',
+ darkblue => 'navy',
+);
+
BEGIN { _export qw(parseColor parse) }
sub parseColor($) {
local $_ = $_[0];
@@ -355,6 +444,7 @@
$_ = lc $_;
return $1 if /^(\w+)$/ and exists $cssColor{$1};
+ return $extraColor{$_} if exists $extraColor{$_};
if(s/^#//) {
s/^ ( [0-9a-f]{1,2} ) $/$1$1$1/x;
@@ -589,6 +679,340 @@
return "[$path]";
}
+sub _c($) {
+ require charnames;
+ my $x = shift;
+ return sprintf("%c(U+%04X %s)", $x, $x, charnames::viacode($x));
+}
+
+BEGIN { _export qw(textOrder text) }
+sub textOrder(\@$) {
+ my $ALPHA = shift;
+ my $n = 0+shift;
+ return $n if $n < 1;
+
+ my $str = "";
+ while($n > 0) {
+ my $digit = ($n % @$ALPHA);
+ $n = int($n / @$ALPHA);
+ if($digit == 0) {
+ $digit = @$ALPHA;
+ $n--;
+ }
+ $str .= $ALPHA->[$digit-1];
+ }
+ return reverse $str;
+}
+
+BEGIN { _export qw(textOrderAlpha text) }
+my @ALPHA = ('A'..'Z');
+sub textOrderAlpha($) {
+ return textOrder(@ALPHA,shift);
+}
+
+BEGIN { _export qw(textOrderGreek text) }
+my @GREEK = qw(Α Β Γ Δ Ε Ζ Η Θ Ι Κ Λ Μ Ν Ξ Ο Π Ρ Σ Τ Υ Φ Χ Ψ Ω);
+sub textOrderGreek($) {
+ return textOrder(@GREEK,shift);
+}
+
+BEGIN { _export qw(textOrderHiragana text) }
+my @HIRAGANA = qw(
+ あ い う え お か
+ き く け こ さ し
+ す せ そ た ち つ
+ て と な に ぬ ね
+ の は ひ ふ へ ほ
+ ま み む め も や
+ ゆ よ ら り る れ
+ ろ わ ゐ ゑ を ん
+);
+sub textOrderHiragana($) {
+ return textOrder(@HIRAGANA,shift);
+}
+
+BEGIN { _export qw(textOrderKatakana text) }
+my @KATAKANA = map { chr(ord($_)+0x60) } @HIRAGANA;
+sub textOrderKatakana($) {
+ return textOrder(@KATAKANA,shift);
+}
+
+BEGIN { _export qw(textOrderHiraganaIROHA text) }
+my @HIRAGANA_IROHA = qw(
+ い ろ は に ほ へ
+ と ち り ぬ る を
+ わ か よ た れ そ
+ つ ね な ら む う
+ ゐ の お く や ま
+ け ふ こ え て あ
+ さ き ゆ め み し
+ ゑ ひ も せ す
+);
+sub textOrderHiraganaIROHA($) {
+ return textOrder(@HIRAGANA_IROHA,shift);
+}
+
+BEGIN { _export qw(textOrderKatakanaIROHA text) }
+my @KATAKANA_IROHA = map { chr(ord($_)+0x60) } @HIRAGANA_IROHA;
+sub textOrderKatakanaIROHA($) {
+ return textOrder(@KATAKANA_IROHA,shift);
+}
+
+BEGIN { _export qw(textOrderCJK text) }
+my $CJK_NEG = '負';
+my @CJK_DIG = qw(零 一 二 三 四 五 六 七 八 九 十);
+my @CJK_ORD = qw(. 十 百 千);
+my @CJK_MAG = qw(.
+ 萬 億 兆 京 垓 秭 穰 溝 澗 正 載 極
+ 恆河沙 阿僧祇 那由他 不可思議 無量大數
+);
+sub textOrderCJK($) {
+ my $num = shift;
+ $num =~ s/\..*$//; # Fractions not handled
+ my $neg = ($num =~ s/^\s*-//) ? 1 : 0;
+ $num =~ s/\D+//g;
+
+ my @groups;
+ push @groups, $1 while $num =~ s/(\d{1,4})$//;
+
+ local $_;
+ my @out;
+ push @out, $CJK_NEG if $neg;
+ for(my $mag=$#groups; $mag>=0; $mag--) {
+ my @digits;
+ $num = $groups[$mag];
+ push @digits, $1 while $num =~ s/(\d)$//;
+
+ $_ = '';
+ for(my $ord=$#digits; $ord>=0; $ord--) {
+ my $dig = 0+$digits[$ord];
+ if($dig) {
+ $_ .= $CJK_DIG[$dig];
+ $_ .= $CJK_ORD[$ord];
+ } else {
+ s/(?<!\Q$CJK_DIG[0]\E)$/$CJK_DIG[0]/o;
+ }
+ }
+ s/\Q$CJK_DIG[0]\E$//o;
+ s/^(\Q$CJK_DIG[0]\E*)\Q$CJK_DIG[1]$CJK_ORD[1]\E/$1$CJK_ORD[1]/o;
+ if(/^$/) {
+ $_ = $CJK_DIG[0];
+ } else {
+ $_ .= $CJK_MAG[$mag];
+ }
+ push @out, $_;
+ }
+ $_ = join '', @out;
+ s/\.//g;
+ s/\Q$CJK_DIG[0]\E+/$CJK_DIG[0]/go;
+ s/\Q$CJK_DIG[0]\E$//o;
+ s/^$/$CJK_DIG[0]/;
+ s/^\Q$CJK_NEG\E$/$CJK_DIG[0]/o;
+ return $_;
+}
+
+BEGIN { _export qw(textOrderHebrew text) }
+my @HEBREW_1 = ('', qw(א ב ג ד ה ו ז ח ט י));
+my @HEBREW_10 = qw(כ ל מ נ ס ע פ צ);
+my @HEBREW_100 = qw(ק ר ש ת);
+sub textOrderHebrew($);
+sub textOrderHebrew($) {
+ my $num = 0+shift;
+ return $num if $num < 0;
+
+ if($num == 0) {
+ return 'אפס';
+ } elsif($num < 11) {
+ return $HEBREW_1[$num];
+ } elsif($num < 20) {
+ my($a,$b) = (10,$num-10);
+ if($b == 5 or $b == 6) {
+ $a--;
+ $b++;
+ }
+ return $HEBREW_1[$a].$HEBREW_1[$b];
+ } elsif($num < 100) {
+ my $ten = int($num / 10);
+ $num %= 10;
+ return $HEBREW_10[$ten - 2].$HEBREW_1[$num];
+ } elsif($num < 500) {
+ my $hnd = int($num / 100);
+ $num %= 100;
+ return $HEBREW_100[$hnd - 1].($num > 0 ? textOrderHebrew($num) : '');
+ } elsif($num < 1000) {
+ return $HEBREW_100[3].textOrderHebrew($num - 400);
+ } else {
+ my @ret;
+ while($num > 0) {
+ my $mod = ($num % 1000);
+ $num = int($num / 1000);
+ push @ret, textOrderHebrew($mod);
+ }
+ return join(' ', @ret);
+ }
+ return '*';
+}
+
+BEGIN { _export qw(textOrderGeorgian text) }
+my @GEORGIAN_1 = ('', qw(Ⴀ Ⴁ Ⴂ Ⴃ Ⴄ Ⴅ Ⴆ Ⴡ Ⴇ));
+my @GEORGIAN_10 = ('', qw(Ⴈ Ⴉ Ⴊ Ⴋ Ⴌ Ⴢ Ⴍ Ⴎ Ⴏ));
+my @GEORGIAN_100 = ('', qw(Ⴐ Ⴑ Ⴒ Ⴓ Ⴣ Ⴔ Ⴕ Ⴖ Ⴗ));
+my @GEORGIAN_1000 = ('', qw(Ⴘ Ⴙ Ⴚ Ⴛ Ⴜ Ⴝ Ⴞ Ⴤ Ⴥ));
+sub textOrderGeorgian($) {
+ my $num = 0+shift;
+ return $num if $num < 1 or $num >= 20000;
+
+ my $ret = '';
+ if($num >= 10000) {
+ $num -= 10000;
+ $ret .= 'Ⴟ';
+ }
+ if($num >= 1000) {
+ my $digit = int($num / 1000);
+ $num %= 1000;
+ $ret .= $GEORGIAN_1000[$digit];
+ }
+ if($num >= 100) {
+ my $digit = int($num / 100);
+ $num %= 100;
+ $ret .= $GEORGIAN_100[$digit];
+ }
+ if($num >= 10) {
+ my $digit = int($num / 10);
+ $num %= 10;
+ $ret .= $GEORGIAN_10[$digit];
+ }
+ if($num >= 1) {
+ $ret .= $GEORGIAN_1[$num];
+ }
+ return $ret;
+}
+
+BEGIN { _export qw(textOrderArmenian text) }
+my @ARMENIAN_1 = ('', qw(Ա Բ Գ Դ Ե Զ Է Ը Թ));
+my @ARMENIAN_10 = ('', qw(Ժ Ի Լ Խ Ծ Կ Հ Ձ Ղ));
+my @ARMENIAN_100 = ('', qw(Ճ Մ Յ Ն Շ Ո Չ Պ Ջ));
+my @ARMENIAN_1000 = ('', qw(Ռ Ս Վ Տ Ր Ց Ւ Փ Ք));
+sub textOrderArmenian($) {
+ my $num = 0+shift;
+ return $num if $num < 1 or $num >= 10000;
+
+ my $ret = '';
+ if($num >= 1000) {
+ my $digit = int($num / 1000);
+ $num %= 1000;
+ $ret .= $ARMENIAN_1000[$digit];
+ }
+ if($num >= 100) {
+ my $digit = int($num / 100);
+ $num %= 100;
+ $ret .= $ARMENIAN_100[$digit];
+ }
+ if($num >= 10) {
+ my $digit = int($num / 10);
+ $num %= 10;
+ $ret .= $ARMENIAN_10[$digit];
+ }
+ if($num >= 1) {
+ $ret .= $ARMENIAN_1[$num];
+ }
+ return $ret;
+}
+
+BEGIN { _export qw(textOrderRoman text) }
+my @ROMAN = map { [ split /:/ ] } qw(1000:M:C 500:D:C 100:C:X 50:L:X 10:X:I 5:V:I 1:I);
+my %ROMAN = map { $_->[1] => $_ } @ROMAN;
+sub textOrderRoman($) {
+ my $num = 0+shift;
+ return $num if $num < 1;
+
+ my $str = "";
+ foreach my $i (@ROMAN) {
+ while($num >= $i->[0]) {
+ $num -= $i->[0];
+ $str .= $i->[1];
+ }
+ if(@$i > 2) {
+ my $j = $ROMAN{$i->[2]};
+ if($num >= ($i->[0] - $j->[0])) {
+ $num -= ($i->[0] - $j->[0]);
+ $str .= $j->[1].$i->[1];
+ }
+ }
+ }
+ return $str;
+}
+
+sub _b10_len($) {
+ my $n = shift;
+ if($n > 0) {
+ return 1+POSIX::floor(log($n)/log(10));
+ }
+ if($n < 0) {
+ return 2+POSIX::floor(log(-$n)/log(10));
+ }
+ return 1;
+}
+
+sub _max($$) {
+ return $_[0] > $_[1] ? $_[0] : $_[1];
+}
+
+BEGIN { _export qw(createListSequence) }
+sub createListSequence($;$$) {
+ my($type,$total,$start) = @_;
+ my @list = parseListType($type);
+ $start = 1 unless defined $start;
+
+ if(@list and $list[0] eq 'ol') {
+ my $type = (@list > 1) ? $list[1] : 'decimal';
+ if($type =~ /^(upper|lower)-(alpha|latin|roman|greek)$/i) {
+ my $func = 'textOrder'.ucfirst(lc($2));
+ my $uc = $1 =~ /^upper$/i;
+ $func =~ s/Latin$/Alpha/;
+ {
+ no strict 'refs';
+ $func = \&{$func};
+ }
+ if($uc) {
+ return sub { $func->($start++).'.' };
+ } else {
+ return sub { lc $func->($start++).'.' };
+ }
+ }
+ if($type =~ /^(hiragana|katakana)(?:-(iroha))?$/i) {
+ my $func = 'textOrder'.ucfirst(lc($1)).(defined $2 ? uc($2) : '');
+ {
+ no strict 'refs';
+ $func = \&{$func};
+ }
+ return sub { $func->($start++).'.' };
+ }
+ if($type =~ /^cjk-ideographic$/i) {
+ return sub { textOrderCJK($start++).'.' };
+ }
+ if($type =~ /^hebrew$/i) {
+ return sub { textOrderHebrew($start++).'.' };
+ }
+ if($type =~ /^georgian$/i) {
+ return sub { textOrderGeorgian($start++).'.' };
+ }
+ if($type =~ /^armenian$/i) {
+ return sub { textOrderArmenian($start++).'.' };
+ }
+ if(defined $total) {
+ my $end = $total + $start - 1;
+ my $len = _max _b10_len $start, _b10_len $end;
+ my $fmt = sprintf '%%%dd.', $len;
+ $fmt =~ s/^%/%0/ if $type =~ /^decimal-leading-zero$/i;
+ return sub { sprintf($fmt,$start++) };
+ } else {
+ return sub { sprintf("%d.",$start++) };
+ }
+ }
+ return sub { '*' };
+}
+
BEGIN {
push @EXPORT_OK, @{$EXPORT_TAGS{ALL}} if exists $EXPORT_TAGS{ALL};
push @EXPORT, @{$EXPORT_TAGS{DEFAULT}} if exists $EXPORT_TAGS{DEFAULT};