use
5.00503;
BEGIN {
eval
q{ use vars qw($VERSION) }
}
$VERSION
=
sprintf
'%d.%02d'
,
q$Revision: 0.75 $
=~ m/(\d+)/xmsg;
BEGIN {
my
$PERL5LIB
= __FILE__;
if
($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
$PERL5LIB
=~ s{[^/]*$}{UTF2};
}
else
{
$PERL5LIB
=~ s{[^/]*$}{UTF2};
}
my
@inc
= ();
my
%inc
= ();
for
my
$path
(
$PERL5LIB
,
@INC
) {
if
(not
exists
$inc
{
$path
}) {
push
@inc
,
$path
;
$inc
{
$path
} = 1;
}
}
@INC
=
@inc
;
}
BEGIN {
eval
q{
no warnings qw(redefine);
*utf8::upgrade = sub { CORE::length $_[0] }
;
*utf8::downgrade
=
sub
{ 1 };
*utf8::encode
=
sub
{ };
*utf8::decode
=
sub
{ 1 };
*utf8::is_utf8
=
sub
{ };
*utf8::valid
=
sub
{ 1 };
};
if
($@) {
*utf8::upgrade
=
sub
{ CORE::
length
$_
[0] };
*utf8::downgrade
=
sub
{ 1 };
*utf8::encode
=
sub
{ };
*utf8::decode
=
sub
{ 1 };
*utf8::is_utf8
=
sub
{ };
*utf8::valid
=
sub
{ 1 };
}
}
BEGIN {
my
$genpkg
=
"Symbol::"
;
my
$genseq
= 0;
sub
gensym () {
my
$name
=
"GEN"
.
$genseq
++;
my
$ref
= \*{
$genpkg
.
$name
};
delete
$$genpkg
{
$name
};
$ref
;
}
sub
qualify ($;$) {
my
(
$name
) =
@_
;
if
(!
ref
(
$name
) && (Eutf2::
index
(
$name
,
'::'
) == -1) && (Eutf2::
index
(
$name
,
"'"
) == -1)) {
my
$pkg
;
my
%global
=
map
{
$_
=> 1}
qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT)
;
if
(
$name
=~ /^(([^a-z])|(\^[a-z_]+))\z/i ||
$global
{
$name
}) {
$name
=~ s/^\^([a-z_])/
'qq(\c'
.$1.
')'
/eei;
$pkg
=
"main"
;
}
else
{
$pkg
= (
@_
> 1) ?
$_
[1] :
caller
;
}
$name
=
$pkg
.
"::"
.
$name
;
}
$name
;
}
sub
qualify_to_ref ($;$) {
no
strict
qw(refs)
;
return
\*{ qualify
$_
[0],
@_
> 1 ?
$_
[1] :
caller
};
}
}
sub
LOCK_SH() {1}
sub
LOCK_EX() {2}
sub
LOCK_UN() {8}
sub
LOCK_NB() {4}
sub
carp(@);
sub
croak(@);
sub
cluck(@);
sub
confess(@);
my
$__FILE__
= __FILE__;
BEGIN {
if
($^X =~ m/ jperl /oxmsi) {
die
"$0 need perl(not jperl) 5.00503 or later. (\$^X==$^X)"
;
}
}
my
$your_char
=
q{(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[\x00-\xFF]}
;
my
$q_char
=
qr/$your_char/
oxms;
my
%range_tr
= ();
my
$is_shiftjis_family
= 0;
my
$is_eucjp_family
= 0;
BEGIN {
eval
q{ use vars qw($encoding_alias) }
}
if
(0) {
}
elsif
(__PACKAGE__ =~ m/ \b Eusascii \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: (?:US-?)?ASCII ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin1 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-1 | IEC[- ]?8859-1 | Latin-?1 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin2 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-2 | IEC[- ]?8859-2 | Latin-?2 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin3 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-3 | IEC[- ]?8859-3 | Latin-?3 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin4 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-4 | IEC[- ]?8859-4 | Latin-?4 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Ecyrillic \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-5 | IEC[- ]?8859-5 | Cyrillic ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Egreek \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-7 | IEC[- ]?8859-7 | Greek ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin5 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-9 | IEC[- ]?8859-9 | Latin-?5 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin6 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-10 | IEC[- ]?8859-10 | Latin-?6 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin7 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-13 | IEC[- ]?8859-13 | Latin-?7 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin8 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-14 | IEC[- ]?8859-14 | Latin-?8 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin9 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-15 | IEC[- ]?8859-15 | Latin-?9 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Elatin10 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: ISO[-_ ]?8859-16 | IEC[- ]?8859-16 | Latin-?10 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Ewindows1252 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0xFF],
],
);
$encoding_alias
=
qr/ \b (?: Windows-?1252 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Eeucjp \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0x8D,0x90..0xA0,0xFF],
],
2
=> [ [0x8E..0x8E],[0xA1..0xDF],
[0xA1..0xFE],[0xA1..0xFE],
],
3
=> [ [0x8F..0x8F],[0xA1..0xFE],[0xA1..0xFE],
],
);
$is_eucjp_family
= 1;
$encoding_alias
=
qr/ \b (?: euc.*jp | jp.*euc | ujis ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Eutf2 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0x7F],
],
2
=> [ [0xC2..0xDF],[0x80..0xBF],
],
3
=> [ [0xE0..0xE0],[0xA0..0xBF],[0x80..0xBF],
[0xE1..0xEC],[0x80..0xBF],[0x80..0xBF],
[0xED..0xED],[0x80..0x9F],[0x80..0xBF],
[0xEE..0xEF],[0x80..0xBF],[0x80..0xBF],
],
4
=> [ [0xF0..0xF0],[0x90..0xBF],[0x80..0xBF],[0x80..0xBF],
[0xF1..0xF3],[0x80..0xBF],[0x80..0xBF],[0x80..0xBF],
[0xF4..0xF4],[0x80..0x8F],[0x80..0xBF],[0x80..0xBF],
],
);
$encoding_alias
=
qr/ \b (?: UTF-8 | utf-8-strict | UTF-?2 ) \b /
oxmsi;
}
elsif
(__PACKAGE__ =~ m/ \b Eoldutf8 \z/oxms) {
%range_tr
= (
1
=> [ [0x00..0x7F],
],
2
=> [ [0xC0..0xDF],[0x80..0xBF],
],
3
=> [ [0xE0..0xEF],[0x80..0xBF],[0x80..0xBF],
],
4
=> [ [0xF0..0xF4],[0x80..0xBF],[0x80..0xBF],[0x80..0xBF],
],
);
$encoding_alias
=
qr/ \b (?: utf8 | CESU-?8 | Modified[ ]?UTF-?8 | Old[ ]?UTF-?8 ) \b /
oxmsi;
}
else
{
croak
"$0 don't know my package name '"
. __PACKAGE__ .
"'"
;
}
sub
import
() {}
sub
unimport() {}
sub
Eutf2::
split
(;$$$);
sub
Eutf2::
tr
($$$$;$);
sub
Eutf2::
chop
(@);
sub
Eutf2::
index
($$;$);
sub
Eutf2::
rindex
($$;$);
sub
Eutf2::capture($);
sub
Eutf2::
chr
(;$);
sub
Eutf2::chr_();
sub
Eutf2::
glob
($);
sub
Eutf2::glob_();
sub
UTF2::
ord
(;$);
sub
UTF2::ord_();
sub
UTF2::
reverse
(@);
sub
UTF2::
length
(;$);
sub
UTF2::
substr
($$;$$);
sub
UTF2::
index
($$;$);
sub
UTF2::
rindex
($$;$);
if
($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
if
(
$ENV
{
'ComSpec'
} =~ / (?: COMMAND\.COM | CMD\.EXE ) \z /oxmsi) {
my
@argv
= ();
for
(
@ARGV
) {
if
(m/\A
' ((?:$q_char)*) '
\z/oxms) {
push
@argv
, $1;
}
elsif
(m/\A (?:
$q_char
)*? [*?] /oxms and (
my
@glob
= Eutf2::
glob
(
$_
))) {
push
@argv
,
@glob
;
}
else
{
push
@argv
,
$_
;
}
}
@ARGV
=
@argv
;
}
}
sub
Eutf2::
split
(;$$$) {
my
$pattern
=
$_
[0];
my
$string
=
$_
[1];
my
$limit
=
$_
[2];
if
(not
defined
$string
) {
if
(
defined
$_
) {
$string
=
$_
;
}
else
{
$string
=
''
;
}
}
my
@split
= ();
if
(
$string
eq
''
) {
if
(
wantarray
) {
return
@split
;
}
else
{
carp
"$0: Use of implicit split to \@_ is deprecated"
if
$^W;
@_
=
@split
;
return
scalar
@_
;
}
}
if
((not
defined
$limit
) or (
$limit
<= 0)) {
if
((not
defined
$pattern
) or (
$pattern
eq
' '
)) {
$string
=~ s/ \A \s+ //oxms;
while
(
$string
=~ s/\A((?:
$q_char
)*?)\s+//m) {
local
$@;
for
(
my
$digit
=1;
eval
"defined(\$$digit)"
;
$digit
++) {
push
@split
,
eval
'$'
.
$digit
;
}
}
}
elsif
(
''
=~ m/ \A
$pattern
\z /xms) {
while
(
$string
=~ s/\A((?:
$q_char
)+?)
$pattern
//m) {
local
$@;
for
(
my
$digit
=1;
eval
"defined(\$$digit)"
;
$digit
++) {
push
@split
,
eval
'$'
.
$digit
;
}
}
}
else
{
while
(
$string
=~ s/\A((?:
$q_char
)*?)
$pattern
//m) {
local
$@;
for
(
my
$digit
=1;
eval
"defined(\$$digit)"
;
$digit
++) {
push
@split
,
eval
'$'
.
$digit
;
}
}
}
}
else
{
if
((not
defined
$pattern
) or (
$pattern
eq
' '
)) {
$string
=~ s/ \A \s+ //oxms;
while
((--
$limit
> 0) and (CORE::
length
(
$string
) > 0)) {
if
(
$string
=~ s/\A((?:
$q_char
)*?)\s+//m) {
local
$@;
for
(
my
$digit
=1;
eval
"defined(\$$digit)"
;
$digit
++) {
push
@split
,
eval
'$'
.
$digit
;
}
}
}
}
elsif
(
''
=~ m/ \A
$pattern
\z /xms) {
while
((--
$limit
> 0) and (CORE::
length
(
$string
) > 0)) {
if
(
$string
=~ s/\A((?:
$q_char
)+?)
$pattern
//m) {
local
$@;
for
(
my
$digit
=1;
eval
"defined(\$$digit)"
;
$digit
++) {
push
@split
,
eval
'$'
.
$digit
;
}
}
}
}
else
{
while
((--
$limit
> 0) and (CORE::
length
(
$string
) > 0)) {
if
(
$string
=~ s/\A((?:
$q_char
)*?)
$pattern
//m) {
local
$@;
for
(
my
$digit
=1;
eval
"defined(\$$digit)"
;
$digit
++) {
push
@split
,
eval
'$'
.
$digit
;
}
}
}
}
}
push
@split
,
$string
;
if
((not
defined
$limit
) or (
$limit
== 0)) {
while
((
scalar
(
@split
) >= 1) and (
$split
[-1] eq
''
)) {
pop
@split
;
}
}
if
(
wantarray
) {
return
@split
;
}
else
{
carp
"$0: Use of implicit split to \@_ is deprecated"
if
$^W;
@_
=
@split
;
return
scalar
@_
;
}
}
sub
Eutf2::
tr
($$$$;$) {
my
$bind_operator
=
$_
[1];
my
$searchlist
=
$_
[2];
my
$replacementlist
=
$_
[3];
my
$modifier
=
$_
[4] ||
''
;
if
(
$modifier
=~ m/r/oxms) {
if
(
$bind_operator
=~ m/ !~ /oxms) {
croak
"$0: Using !~ with tr///r doesn't make sense"
;
}
}
my
@char
=
$_
[0] =~ m/\G (
$q_char
) /oxmsg;
my
@searchlist
= _charlist_tr(
$searchlist
);
my
@replacementlist
= _charlist_tr(
$replacementlist
);
my
%tr
= ();
for
(
my
$i
=0;
$i
<=
$#searchlist
;
$i
++) {
if
(not
exists
$tr
{
$searchlist
[
$i
]}) {
if
(
defined
$replacementlist
[
$i
] and (
$replacementlist
[
$i
] ne
''
)) {
$tr
{
$searchlist
[
$i
]} =
$replacementlist
[
$i
];
}
elsif
(
$modifier
=~ m/d/oxms) {
$tr
{
$searchlist
[
$i
]} =
''
;
}
elsif
(
defined
$replacementlist
[-1] and (
$replacementlist
[-1] ne
''
)) {
$tr
{
$searchlist
[
$i
]} =
$replacementlist
[-1];
}
else
{
$tr
{
$searchlist
[
$i
]} =
$searchlist
[
$i
];
}
}
}
my
$tr
= 0;
my
$replaced
=
''
;
if
(
$modifier
=~ m/c/oxms) {
while
(
defined
(
my
$char
=
shift
@char
)) {
if
(not
exists
$tr
{
$char
}) {
if
(
defined
$replacementlist
[0]) {
$replaced
.=
$replacementlist
[0];
}
$tr
++;
if
(
$modifier
=~ m/s/oxms) {
while
(
@char
and (not
exists
$tr
{
$char
[0]})) {
shift
@char
;
$tr
++;
}
}
}
else
{
$replaced
.=
$char
;
}
}
}
else
{
while
(
defined
(
my
$char
=
shift
@char
)) {
if
(
exists
$tr
{
$char
}) {
$replaced
.=
$tr
{
$char
};
$tr
++;
if
(
$modifier
=~ m/s/oxms) {
while
(
@char
and (
exists
$tr
{
$char
[0]}) and (
$tr
{
$char
[0]} eq
$tr
{
$char
})) {
shift
@char
;
$tr
++;
}
}
}
else
{
$replaced
.=
$char
;
}
}
}
if
(
$modifier
=~ m/r/oxms) {
return
$replaced
;
}
else
{
$_
[0] =
$replaced
;
if
(
$bind_operator
=~ m/ !~ /oxms) {
return
not
$tr
;
}
else
{
return
$tr
;
}
}
}
sub
Eutf2::
chop
(@) {
my
$chop
;
if
(
@_
== 0) {
my
@char
= m/\G (
$q_char
) /oxmsg;
$chop
=
pop
@char
;
$_
=
join
''
,
@char
;
}
else
{
for
(
@_
) {
my
@char
= m/\G (
$q_char
) /oxmsg;
$chop
=
pop
@char
;
$_
=
join
''
,
@char
;
}
}
return
$chop
;
}
sub
Eutf2::
index
($$;$) {
my
(
$str
,
$substr
,
$position
) =
@_
;
$position
||= 0;
my
$pos
= 0;
while
(
$pos
< CORE::
length
(
$str
)) {
if
(CORE::
substr
(
$str
,
$pos
,CORE::
length
(
$substr
)) eq
$substr
) {
if
(
$pos
>=
$position
) {
return
$pos
;
}
}
if
(CORE::
substr
(
$str
,
$pos
) =~ m/\A (
$q_char
) /oxms) {
$pos
+= CORE::
length
($1);
}
else
{
$pos
+= 1;
}
}
return
-1;
}
sub
Eutf2::
rindex
($$;$) {
my
(
$str
,
$substr
,
$position
) =
@_
;
$position
||= CORE::
length
(
$str
) - 1;
my
$pos
= 0;
my
$rindex
= -1;
while
((
$pos
< CORE::
length
(
$str
)) and (
$pos
<=
$position
)) {
if
(CORE::
substr
(
$str
,
$pos
,CORE::
length
(
$substr
)) eq
$substr
) {
$rindex
=
$pos
;
}
if
(CORE::
substr
(
$str
,
$pos
) =~ m/\A (
$q_char
) /oxms) {
$pos
+= CORE::
length
($1);
}
else
{
$pos
+= 1;
}
}
return
$rindex
;
}
{
sub
Eutf2::capture($) {
return
$_
[0];
}
}
my
@chars1
= ();
sub
chars1 {
if
(
@chars1
) {
return
@chars1
;
}
if
(
exists
$range_tr
{1}) {
my
@ranges
= @{
$range_tr
{1} };
while
(
my
@range
=
splice
(
@ranges
,0,1)) {
for
my
$oct0
(@{
$range
[0]}) {
push
@chars1
,
pack
'C'
,
$oct0
;
}
}
}
return
@chars1
;
}
my
@chars2
= ();
sub
chars2 {
if
(
@chars2
) {
return
@chars2
;
}
if
(
exists
$range_tr
{2}) {
my
@ranges
= @{
$range_tr
{2} };
while
(
my
@range
=
splice
(
@ranges
,0,2)) {
for
my
$oct0
(@{
$range
[0]}) {
for
my
$oct1
(@{
$range
[1]}) {
push
@chars2
,
pack
'CC'
,
$oct0
,
$oct1
;
}
}
}
}
return
@chars2
;
}
my
@chars3
= ();
sub
chars3 {
if
(
@chars3
) {
return
@chars3
;
}
if
(
exists
$range_tr
{3}) {
my
@ranges
= @{
$range_tr
{3} };
while
(
my
@range
=
splice
(
@ranges
,0,3)) {
for
my
$oct0
(@{
$range
[0]}) {
for
my
$oct1
(@{
$range
[1]}) {
for
my
$oct2
(@{
$range
[2]}) {
push
@chars3
,
pack
'CCC'
,
$oct0
,
$oct1
,
$oct2
;
}
}
}
}
}
return
@chars3
;
}
my
@chars4
= ();
sub
chars4 {
if
(
@chars4
) {
return
@chars4
;
}
if
(
exists
$range_tr
{4}) {
my
@ranges
= @{
$range_tr
{4} };
while
(
my
@range
=
splice
(
@ranges
,0,4)) {
for
my
$oct0
(@{
$range
[0]}) {
for
my
$oct1
(@{
$range
[1]}) {
for
my
$oct2
(@{
$range
[2]}) {
for
my
$oct3
(@{
$range
[3]}) {
push
@chars4
,
pack
'CCCC'
,
$oct0
,
$oct1
,
$oct2
,
$oct3
;
}
}
}
}
}
}
return
@chars4
;
}
my
@minchar
= ();
sub
minchar {
if
(
defined
$minchar
[
$_
[0]]) {
return
$minchar
[
$_
[0]];
}
$minchar
[
$_
[0]] = (&{(
sub
{}, \
&chars1
, \
&chars2
, \
&chars3
, \
&chars4
)[
$_
[0]]})[0];
}
my
@maxchar
= ();
sub
maxchar {
if
(
defined
$maxchar
[
$_
[0]]) {
return
$maxchar
[
$_
[0]];
}
$maxchar
[
$_
[0]] = (&{(
sub
{}, \
&chars1
, \
&chars2
, \
&chars3
, \
&chars4
)[
$_
[0]]})[-1];
}
sub
_charlist_tr {
local
$_
=
shift
@_
;
my
@char
= ();
while
(not m/\G \z/oxmsgc) {
if
(m/\G (\\0?55|\\x2[Dd]|\\-) /oxmsgc) {
push
@char
,
'\-'
;
}
elsif
(m/\G \\ ([0-7]{2,3}) /oxmsgc) {
push
@char
, CORE::
chr
(
oct
$1);
}
elsif
(m/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
push
@char
, CORE::
chr
(
hex
$1);
}
elsif
(m/\G \\c ([\x40-\x5F]) /oxmsgc) {
push
@char
, CORE::
chr
(CORE::
ord
($1) & 0x1F);
}
elsif
(m/\G (\\ [0nrtfbae]) /oxmsgc) {
push
@char
, {
'\0'
=>
"\0"
,
'\n'
=>
"\n"
,
'\r'
=>
"\r"
,
'\t'
=>
"\t"
,
'\f'
=>
"\f"
,
'\b'
=>
"\x08"
,
'\a'
=>
"\a"
,
'\e'
=>
"\e"
,
}->{$1};
}
elsif
(m/\G \\ (
$q_char
) /oxmsgc) {
push
@char
, $1;
}
elsif
(m/\G (
$q_char
) /oxmsgc) {
push
@char
, $1;
}
}
@char
=
join
(
''
,
@char
) =~ m/\G (\\-|
$q_char
) /oxmsg;
my
@i
= ();
for
my
$i
(0 ..
$#char
) {
if
(
$char
[
$i
] eq
'\-'
) {
$char
[
$i
] =
'-'
;
}
elsif
(
$char
[
$i
] eq
'-'
) {
if
((0 <
$i
) and (
$i
<
$#char
)) {
push
@i
,
$i
;
}
}
}
for
my
$i
(CORE::
reverse
@i
) {
my
@range
= ();
if
((
length
(
$char
[
$i
-1]) >
length
(
$char
[
$i
+1])) or (
$char
[
$i
-1] gt
$char
[
$i
+1])) {
croak
"$0: invalid [] range \"\\x"
.
unpack
(
'H*'
,
$char
[
$i
-1]) .
'-\\x'
.
unpack
(
'H*'
,
$char
[
$i
+1]) .
'" in regexp'
;
}
if
(
length
(
$char
[
$i
-1]) == 1) {
if
(
length
(
$char
[
$i
+1]) == 1) {
push
@range
,
grep
{(
$char
[
$i
-1] le
$_
) and (
$_
le
$char
[
$i
+1])}
&chars1
();
}
elsif
(
length
(
$char
[
$i
+1]) == 2) {
push
@range
,
grep
{
$char
[
$i
-1] le
$_
}
&chars1
();
push
@range
,
grep
{
$_
le
$char
[
$i
+1]}
&chars2
();
}
elsif
(
length
(
$char
[
$i
+1]) == 3) {
push
@range
,
grep
{
$char
[
$i
-1] le
$_
}
&chars1
();
push
@range
,
&chars2
();
push
@range
,
grep
{
$_
le
$char
[
$i
+1]}
&chars3
();
}
elsif
(
length
(
$char
[
$i
+1]) == 4) {
push
@range
,
grep
{
$char
[
$i
-1] le
$_
}
&chars1
();
push
@range
,
&chars2
();
push
@range
,
&chars3
();
push
@range
,
grep
{
$_
le
$char
[
$i
+1]}
&chars4
();
}
}
elsif
(
length
(
$char
[
$i
-1]) == 2) {
if
(
length
(
$char
[
$i
+1]) == 2) {
push
@range
,
grep
{(
$char
[
$i
-1] le
$_
) and (
$_
le
$char
[
$i
+1])}
&chars2
();
}
elsif
(
length
(
$char
[
$i
+1]) == 3) {
push
@range
,
grep
{
$char
[
$i
-1] le
$_
}
&chars2
();
push
@range
,
grep
{
$_
le
$char
[
$i
+1]}
&chars3
();
}
elsif
(
length
(
$char
[
$i
+1]) == 4) {
push
@range
,
grep
{
$char
[
$i
-1] le
$_
}
&chars2
();
push
@range
,
&chars3
();
push
@range
,
grep
{
$_
le
$char
[
$i
+1]}
&chars4
();
}
}
elsif
(
length
(
$char
[
$i
-1]) == 3) {
if
(
length
(
$char
[
$i
+1]) == 3) {
push
@range
,
grep
{(
$char
[
$i
-1] le
$_
) and (
$_
le
$char
[
$i
+1])}
&chars3
();
}
elsif
(
length
(
$char
[
$i
+1]) == 4) {
push
@range
,
grep
{
$char
[
$i
-1] le
$_
}
&chars3
();
push
@range
,
grep
{
$_
le
$char
[
$i
+1]}
&chars4
();
}
}
elsif
(
length
(
$char
[
$i
-1]) == 4) {
if
(
length
(
$char
[
$i
+1]) == 4) {
push
@range
,
grep
{(
$char
[
$i
-1] le
$_
) and (
$_
le
$char
[
$i
+1])}
&chars4
();
}
}
splice
@char
,
$i
-1, 3,
@range
;
}
return
@char
;
}
sub
_octets {
my
$modifier
=
pop
@_
;
my
$length
=
shift
;
my
(
$a
) =
unpack
'C'
,
$_
[0];
my
(
$z
) =
unpack
'C'
,
$_
[1];
if
(
$length
== 1) {
if
(((
caller
(1))[3] ne
'Eutf2::_octets'
) and (
$modifier
=~ m/i/oxms)) {
if
(
$a
==
$z
) {
return
sprintf
(
'(?i:\x%02X)'
,
$a
);
}
elsif
((
$a
+1) ==
$z
) {
return
sprintf
(
'(?i:[\x%02X\x%02X])'
,
$a
,
$z
);
}
else
{
return
sprintf
(
'(?i:[\x%02X-\x%02X])'
,
$a
,
$z
);
}
}
else
{
if
(
$a
==
$z
) {
return
sprintf
(
'\x%02X'
,
$a
);
}
elsif
((
$a
+1) ==
$z
) {
return
sprintf
(
'[\x%02X\x%02X]'
,
$a
,
$z
);
}
else
{
return
sprintf
(
'[\x%02X-\x%02X]'
,
$a
,
$z
);
}
}
}
elsif
((
$length
== 2) and
$is_shiftjis_family
and (
$a
<= 0x9F) and (0xE0 <=
$z
)) {
my
(
undef
,
$a2
) =
unpack
'CC'
,
$_
[0];
my
(
undef
,
$z2
) =
unpack
'CC'
,
$_
[1];
my
$octets1
;
my
$octets2
;
if
(
$a
== 0x9F) {
$octets1
=
sprintf
(
'\x%02X[\x%02X-\xFF]'
, 0x9F,
$a2
);
}
elsif
((
$a
+1) == 0x9F) {
$octets1
=
sprintf
(
'\x%02X[\x%02X-\xFF]|\x%02X[\x00-\xFF]'
,
$a
,
$a2
,
$a
+1);
}
elsif
((
$a
+2) == 0x9F) {
$octets1
=
sprintf
(
'\x%02X[\x%02X-\xFF]|[\x%02X\x%02X][\x00-\xFF]'
,
$a
,
$a2
,
$a
+1,
$a
+2);
}
else
{
$octets1
=
sprintf
(
'\x%02X[\x%02X-\xFF]|[\x%02X-\x%02X][\x00-\xFF]'
,
$a
,
$a2
,
$a
+1,
$a
+2);
}
if
(
$z
== 0xE0) {
$octets2
=
sprintf
(
'\x%02X[\x00-\x%02X]'
,
$z
,
$z2
);
}
elsif
((
$z
-1) == 0xE0) {
$octets2
=
sprintf
(
'\x%02X[\x00-\xFF]|\x%02X[\x00-\x%02X]'
,
$z
-1,
$z
,
$z2
);
}
elsif
((
$z
-2) == 0xE0) {
$octets2
=
sprintf
(
'[\x%02X\x%02X][\x00-\xFF]|\x%02X[\x00X-\x%02X]'
,
$z
-2,
$z
-1,
$z
,
$z2
);
}
else
{
$octets2
=
sprintf
(
'[\x%02X-\x%02X][\x00-\xFF]|\x%02X[\x00-\x%02X]'
, 0xE0,
$z
-1,
$z
,
$z2
);
}
return
"(?:$octets1|$octets2)"
;
}
elsif
((
$length
== 2) and
$is_eucjp_family
and (
$a
== 0x8E) and (0xA1 <=
$z
)) {
my
(
undef
,
$a2
) =
unpack
'CC'
,
$_
[0];
my
(
undef
,
$z2
) =
unpack
'CC'
,
$_
[1];
my
$octets1
;
my
$octets2
;
$octets1
=
sprintf
(
'\x%02X[\x%02X-\xFF]'
, 0x8E,
$a2
);
if
(
$z
== 0xA1) {
$octets2
=
sprintf
(
'\x%02X[\x00-\x%02X]'
,
$z
,
$z2
);
}
elsif
((
$z
-1) == 0xA1) {
$octets2
=
sprintf
(
'\x%02X[\x00-\xFF]|\x%02X[\x00-\x%02X]'
,
$z
-1,
$z
,
$z2
);
}
elsif
((
$z
-2) == 0xA1) {
$octets2
=
sprintf
(
'[\x%02X\x%02X][\x00-\xFF]|\x%02X[\x00X-\x%02X]'
,
$z
-2,
$z
-1,
$z
,
$z2
);
}
else
{
$octets2
=
sprintf
(
'[\x%02X-\x%02X][\x00-\xFF]|\x%02X[\x00-\x%02X]'
, 0xA1,
$z
-1,
$z
,
$z2
);
}
return
"(?:$octets1|$octets2)"
;
}
else
{
my
(
undef
,
$aa
) =
unpack
'Ca*'
,
$_
[0];
my
(
undef
,
$zz
) =
unpack
'Ca*'
,
$_
[1];
if
(
$a
==
$z
) {
return
'(?:'
.
join
(
'|'
,
sprintf
(
'\x%02X%s'
,
$a
, _octets(
$length
-1,
$aa
,
$zz
,
$modifier
)),
) .
')'
;
}
elsif
((
$a
+1) ==
$z
) {
return
'(?:'
.
join
(
'|'
,
sprintf
(
'\x%02X%s'
,
$a
, _octets(
$length
-1,
$aa
,
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'\x%02X%s'
,
$z
, _octets(
$length
-1,
&minchar
(
$length
-1),
$zz
,
$modifier
)),
) .
')'
;
}
elsif
((
$a
+2) ==
$z
) {
return
'(?:'
.
join
(
'|'
,
sprintf
(
'\x%02X%s'
,
$a
, _octets(
$length
-1,
$aa
,
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'\x%02X%s'
,
$a
+1, _octets(
$length
-1,
&minchar
(
$length
-1),
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'\x%02X%s'
,
$z
, _octets(
$length
-1,
&minchar
(
$length
-1),
$zz
,
$modifier
)),
) .
')'
;
}
elsif
((
$a
+3) ==
$z
) {
return
'(?:'
.
join
(
'|'
,
sprintf
(
'\x%02X%s'
,
$a
, _octets(
$length
-1,
$aa
,
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'[\x%02X\x%02X]%s'
,
$a
+1,
$z
-1, _octets(
$length
-1,
&minchar
(
$length
-1),
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'\x%02X%s'
,
$z
, _octets(
$length
-1,
&minchar
(
$length
-1),
$zz
,
$modifier
)),
) .
')'
;
}
else
{
return
'(?:'
.
join
(
'|'
,
sprintf
(
'\x%02X%s'
,
$a
, _octets(
$length
-1,
$aa
,
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'[\x%02X-\x%02X]%s'
,
$a
+1,
$z
-1, _octets(
$length
-1,
&minchar
(
$length
-1),
&maxchar
(
$length
-1),
$modifier
)),
sprintf
(
'\x%02X%s'
,
$z
, _octets(
$length
-1,
&minchar
(
$length
-1),
$zz
,
$modifier
)),
) .
')'
;
}
}
}
sub
_charlist {
my
$modifier
=
pop
@_
;
my
@char
=
@_
;
for
(
my
$i
=0;
$i
<=
$#char
;
$i
++) {
if
(
$char
[
$i
] eq
'-'
) {
if
((0 <
$i
) and (
$i
<
$#char
)) {
$char
[
$i
] =
'...'
;
}
}
elsif
(
$char
[
$i
] =~ m/\A \\ ([0-7]{2,3}) \z/oxms) {
$char
[
$i
] = CORE::
chr
oct
$1;
}
elsif
(
$char
[
$i
] =~ m/\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
$char
[
$i
] = CORE::
chr
hex
$1;
}
elsif
(
$char
[
$i
] =~ m/\A \\c ([\x40-\x5F]) \z/oxms) {
$char
[
$i
] = CORE::
chr
(CORE::
ord
($1) & 0x1F);
}
elsif
(
$char
[
$i
] =~ m/\A (\\ [0nrtfbaedDhHsSvVwW]) \z/oxms) {
$char
[
$i
] = {
'\0'
=>
"\0"
,
'\n'
=>
"\n"
,
'\r'
=>
"\r"
,
'\t'
=>
"\t"
,
'\f'
=>
"\f"
,
'\b'
=>
"\x08"
,
'\a'
=>
"\a"
,
'\e'
=>
"\e"
,
'\d'
=>
'[0-9]'
,
'\s'
=>
'[\x09\x0A\x0C\x0D\x20]'
,
'\w'
=>
'[0-9A-Z_a-z]'
,
'\D'
=>
'(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^0-9])'
,
'\S'
=>
'(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^\x09\x0A\x0C\x0D\x20])'
,
'\W'
=>
'(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^0-9A-Z_a-z])'
,
'\H'
=>
'(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^\x09\x20])'
,
'\V'
=>
'(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^\x0C\x0A\x0D])'
,
'\h'
=>
'[\x09\x20]'
,
'\v'
=>
'[\x0C\x0A\x0D]'
,
}->{$1};
}
elsif
(
$char
[
$i
] =~ m/\A \\ (
$q_char
) \z/oxms) {
$char
[
$i
] = $1;
}
}
my
@singleoctet
= ();
my
@charlist
= ();
for
(
my
$i
=0;
$i
<=
$#char
; ) {
if
(
defined
(
$char
[
$i
+1]) and (
$char
[
$i
+1] eq
'...'
)) {
$i
+= 1;
next
;
}
elsif
(
$char
[
$i
] eq
'...'
) {
if
((
length
(
$char
[
$i
-1]) >
length
(
$char
[
$i
+1])) or (
$char
[
$i
-1] gt
$char
[
$i
+1])) {
croak
"$0: invalid [] range \"\\x"
.
unpack
(
'H*'
,
$char
[
$i
-1]) .
'-\\x'
.
unpack
(
'H*'
,
$char
[
$i
+1]) .
'" in regexp'
;
}
if
((
length
(
$char
[
$i
-1]) == 1) and (
length
(
$char
[
$i
+1]) == 1) and (
$modifier
!~ m/i/oxms)) {
my
$a
=
unpack
'C'
,
$char
[
$i
-1];
my
$z
=
unpack
'C'
,
$char
[
$i
+1];
if
(
$a
==
$z
) {
push
@singleoctet
,
sprintf
(
'\x%02X'
,
$a
);
}
elsif
((
$a
+1) ==
$z
) {
push
@singleoctet
,
sprintf
(
'\x%02X\x%02X'
,
$a
,
$z
);
}
else
{
push
@singleoctet
,
sprintf
(
'\x%02X-\x%02X'
,
$a
,
$z
);
}
}
elsif
(
length
(
$char
[
$i
-1]) ==
length
(
$char
[
$i
+1])) {
push
@charlist
, _octets(
length
(
$char
[
$i
-1]),
$char
[
$i
-1],
$char
[
$i
+1],
$modifier
);
}
elsif
(
length
(
$char
[
$i
-1]) == 1) {
if
(
length
(
$char
[
$i
+1]) == 2) {
push
@charlist
,
_octets(1,
$char
[
$i
-1],
&maxchar
(1),
$modifier
),
_octets(2,
&minchar
(2),
$char
[
$i
+1],
$modifier
);
}
elsif
(
length
(
$char
[
$i
+1]) == 3) {
push
@charlist
,
_octets(1,
$char
[
$i
-1],
&maxchar
(1),
$modifier
),
_octets(2,
&minchar
(2),
&maxchar
(2),
$modifier
),
_octets(3,
&minchar
(3),
$char
[
$i
+1],
$modifier
);
}
elsif
(
length
(
$char
[
$i
+1]) == 4) {
push
@charlist
,
_octets(1,
$char
[
$i
-1],
&maxchar
(1),
$modifier
),
_octets(2,
&minchar
(2),
&maxchar
(2),
$modifier
),
_octets(3,
&minchar
(3),
&maxchar
(3),
$modifier
),
_octets(4,
&minchar
(4),
$char
[
$i
+1],
$modifier
);
}
}
elsif
(
length
(
$char
[
$i
-1]) == 2) {
if
(
length
(
$char
[
$i
+1]) == 3) {
push
@charlist
,
_octets(2,
$char
[
$i
-1],
&maxchar
(2),
$modifier
),
_octets(3,
&minchar
(3),
$char
[
$i
+1],
$modifier
);
}
elsif
(
length
(
$char
[
$i
+1]) == 4) {
push
@charlist
,
_octets(2,
$char
[
$i
-1],
&maxchar
(2),
$modifier
),
_octets(3,
&minchar
(3),
&maxchar
(3),
$modifier
),
_octets(4,
&minchar
(4),
$char
[
$i
+1],
$modifier
);
}
}
elsif
(
length
(
$char
[
$i
-1]) == 3) {
if
(
length
(
$char
[
$i
+1]) == 4) {
push
@charlist
,
_octets(3,
$char
[
$i
-1],
&maxchar
(3),
$modifier
),
_octets(4,
&minchar
(4),
$char
[
$i
+1],
$modifier
);
}
}
else
{
croak
"$0: invalid [] range \"\\x"
.
unpack
(
'H*'
,
$char
[
$i
-1]) .
'-\\x'
.
unpack
(
'H*'
,
$char
[
$i
+1]) .
'" in regexp'
;
}
$i
+= 2;
}
elsif
(
$char
[
$i
] =~ m/\A [\x00-\xFF] \z/oxms) {
if
(
$modifier
=~ m/i/oxms) {
my
$uc
=
uc
(
$char
[
$i
]);
my
$lc
=
lc
(
$char
[
$i
]);
if
(
$uc
ne
$lc
) {
push
@singleoctet
,
$uc
,
$lc
;
}
else
{
push
@singleoctet
,
$char
[
$i
];
}
}
else
{
push
@singleoctet
,
$char
[
$i
];
}
$i
+= 1;
}
elsif
(
$char
[
$i
] =~ m/\A (?: \\h ) \z/oxms) {
push
@singleoctet
,
"\t"
,
"\x20"
;
$i
+= 1;
}
elsif
(
$char
[
$i
] =~ m/\A (?: \\v ) \z/oxms) {
push
@singleoctet
,
"\f"
,
"\n"
,
"\r"
;
$i
+= 1;
}
elsif
(
$char
[
$i
] =~ m/\A (?: [\x00-\xFF] | \\d | \\s | \\w ) \z/oxms) {
push
@singleoctet
,
$char
[
$i
];
$i
+= 1;
}
else
{
push
@charlist
,
$char
[
$i
];
$i
+= 1;
}
}
for
(
@singleoctet
) {
if
(m/\A \n \z/oxms) {
$_
=
'\n'
;
}
elsif
(m/\A \r \z/oxms) {
$_
=
'\r'
;
}
elsif
(m/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
$_
=
sprintf
(
'\x%02X'
, CORE::
ord
$1);
}
elsif
(m/\A [\x00-\xFF] \z/oxms) {
$_
=
quotemeta
$_
;
}
}
return
\
@singleoctet
, \
@charlist
;
}
sub
charlist_qr {
my
$modifier
=
pop
@_
;
my
@char
=
@_
;
my
(
$singleoctet
,
$charlist
) = _charlist(
@char
,
$modifier
);
my
@singleoctet
=
@$singleoctet
;
my
@charlist
=
@$charlist
;
if
(
scalar
(
@singleoctet
) == 0) {
}
elsif
(
scalar
(
@singleoctet
) >= 2) {
push
@charlist
,
'['
.
join
(
''
,
@singleoctet
) .
']'
;
}
elsif
(
$singleoctet
[0] =~ m/ . - . /oxms) {
push
@charlist
,
'['
.
$singleoctet
[0] .
']'
;
}
else
{
push
@charlist
,
$singleoctet
[0];
}
if
(
scalar
(
@charlist
) >= 2) {
return
'(?:'
.
join
(
'|'
,
@charlist
) .
')'
;
}
else
{
return
$charlist
[0];
}
}
sub
charlist_not_qr {
my
$modifier
=
pop
@_
;
my
@char
=
@_
;
my
(
$singleoctet
,
$charlist
) = _charlist(
@char
,
$modifier
);
my
@singleoctet
=
@$singleoctet
;
my
@charlist
=
@$charlist
;
if
(
scalar
(
@charlist
) >= 1) {
if
(
scalar
(
@singleoctet
) >= 1) {
return
'(?!'
.
join
(
'|'
,
@charlist
) .
')(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^'
.
join
(
''
,
@singleoctet
) .
'])'
;
}
else
{
return
'(?!'
.
join
(
'|'
,
@charlist
) .
")(?:$your_char)"
;
}
}
else
{
if
(
scalar
(
@singleoctet
) >= 1) {
return
'(?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^'
.
join
(
''
,
@singleoctet
) .
'])'
;
}
else
{
return
"(?:$your_char)"
;
}
}
}
sub
Eutf2::
chr
(;$) {
my
$c
=
@_
?
$_
[0] :
$_
;
if
(
$c
== 0x00) {
return
"\x00"
;
}
else
{
my
@chr
= ();
while
(
$c
> 0) {
unshift
@chr
, (
$c
% 0x100);
$c
=
int
(
$c
/ 0x100);
}
return
pack
'C*'
,
@chr
;
}
}
sub
Eutf2::chr_() {
my
$c
=
$_
;
if
(
$c
== 0x00) {
return
"\x00"
;
}
else
{
my
@chr
= ();
while
(
$c
> 0) {
unshift
@chr
, (
$c
% 0x100);
$c
=
int
(
$c
/ 0x100);
}
return
pack
'C*'
,
@chr
;
}
}
sub
Eutf2::
glob
($) {
return
_dosglob(
@_
);
}
sub
Eutf2::glob_() {
return
_dosglob();
}
my
%iter
;
my
%entries
;
sub
_dosglob {
my
(
$expr
,
$cxix
) =
@_
;
$expr
=
$_
if
not
defined
$expr
;
if
($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
$expr
=~ s{ \A ~ (?= [^/\\] ) }
{
$ENV
{
'HOME'
} ||
$ENV
{
'USERPROFILE'
} ||
"$ENV{'HOMEDRIVE'}$ENV{'HOMEPATH'}"
}oxmse;
}
else
{
$expr
=~ s{ \A ~ ( (?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^/])* ) }
{ $1 ? (
getpwnam
($1))[7] : (
$ENV
{
'HOME'
} ||
$ENV
{
'LOGDIR'
} || (
getpwuid
($<))[7]) }oxmse;
}
$cxix
=
'_G_'
if
not
defined
$cxix
;
$iter
{
$cxix
} = 0
if
not
exists
$iter
{
$cxix
};
if
(
$iter
{
$cxix
} == 0) {
$entries
{
$cxix
} = [ _do_glob(1, _parse_line(
$expr
)) ];
}
if
(
wantarray
) {
delete
$iter
{
$cxix
};
return
@{
delete
$entries
{
$cxix
}};
}
else
{
if
(
$iter
{
$cxix
} =
scalar
@{
$entries
{
$cxix
}}) {
return
shift
@{
$entries
{
$cxix
}};
}
else
{
delete
$iter
{
$cxix
};
delete
$entries
{
$cxix
};
return
undef
;
}
}
}
sub
_do_glob {
my
(
$cond
,
@expr
) =
@_
;
my
@glob
= ();
OUTER:
for
my
$expr
(
@expr
) {
next
OUTER
if
not
defined
$expr
;
next
OUTER
if
$expr
eq
''
;
my
@matched
= ();
my
@globdir
= ();
my
$head
=
'.'
;
my
$pathsep
=
'/'
;
my
$tail
;
if
(
$expr
=~ m/\A
" ((?:$q_char)*) "
\z/oxms) {
$expr
= $1;
if
(
$cond
eq
'd'
) {
if
(-d
$expr
) {
push
@glob
,
$expr
;
}
}
else
{
if
(-e
$expr
) {
push
@glob
,
$expr
;
}
}
next
OUTER;
}
if
($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
$expr
=~ s
}
if
((
$head
,
$tail
) = _parse_path(
$expr
,
$pathsep
)) {
if
(
$tail
eq
''
) {
push
@glob
,
$expr
;
next
OUTER;
}
if
(
$head
=~ m/ \A (?:
$q_char
)*? [*?] /oxms) {
if
(
@globdir
= _do_glob(
'd'
,
$head
)) {
push
@glob
, _do_glob(
$cond
,
map
{
"$_$pathsep$tail"
}
@globdir
);
next
OUTER;
}
}
if
(
$head
eq
''
or
$head
=~ m/\A [A-Za-z]: \z/oxms) {
$head
.=
$pathsep
;
}
$expr
=
$tail
;
}
if
(
$expr
!~ m/ \A (?:
$q_char
)*? [*?] /oxms) {
if
(
$head
eq
'.'
) {
$head
=
''
;
}
if
(
$head
ne
''
and (
$head
=~ m/ \G (
$q_char
) /oxmsg)[-1] ne
$pathsep
) {
$head
.=
$pathsep
;
}
$head
.=
$expr
;
if
(
$cond
eq
'd'
) {
if
(-d
$head
) {
push
@glob
,
$head
;
}
}
else
{
if
(-e
$head
) {
push
@glob
,
$head
;
}
}
next
OUTER;
}
opendir
(
*DIR
,
$head
) or
next
OUTER;
my
@leaf
=
readdir
DIR;
closedir
DIR;
if
(
$head
eq
'.'
) {
$head
=
''
;
}
if
(
$head
ne
''
and (
$head
=~ m/ \G (
$q_char
) /oxmsg)[-1] ne
$pathsep
) {
$head
.=
$pathsep
;
}
my
$pattern
=
''
;
while
(
$expr
=~ m/ \G (
$q_char
) /oxgc) {
my
$char
= $1;
if
(
$char
eq
'*'
) {
$pattern
.=
"(?:$your_char)*"
,
}
elsif
(
$char
eq
'?'
) {
$pattern
.=
"(?:$your_char)?"
,
}
elsif
((
my
$uc
=
uc
(
$char
)) ne
$char
) {
$pattern
.=
$uc
;
}
else
{
$pattern
.=
quotemeta
$char
;
}
}
my
$matchsub
=
sub
{
uc
(
$_
[0]) =~ m{\A
$pattern
\z}xms };
INNER:
for
my
$leaf
(
@leaf
) {
if
(
$leaf
eq
'.'
or
$leaf
eq
'..'
) {
next
INNER;
}
if
(
$cond
eq
'd'
and not -d
"$head$leaf"
) {
next
INNER;
}
if
(
&$matchsub
(
$leaf
)) {
push
@matched
,
"$head$leaf"
;
next
INNER;
}
if
(Eutf2::
index
(
$leaf
,
'.'
) == -1 and
CORE::
length
(
$leaf
) <= 8 and
Eutf2::
index
(
$pattern
,
'\\.'
) != -1
) {
if
(
&$matchsub
(
"$leaf."
)) {
push
@matched
,
"$head$leaf"
;
next
INNER;
}
}
}
if
(
@matched
) {
push
@glob
,
@matched
;
}
}
return
@glob
;
}
sub
_parse_line {
my
(
$line
) =
@_
;
$line
.=
' '
;
my
@piece
= ();
while
(
$line
=~ m{
" ( (?: (?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^"
] )* ) " \s+ |
( (?: (?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^"\s] )* ) \s+
}oxmsg
) {
push
@piece
,
defined
($1) ? $1 : $2;
}
return
@piece
;
}
sub
_parse_path {
my
(
$path
,
$pathsep
) =
@_
;
$path
.=
'/'
;
my
@subpath
= ();
while
(
$path
=~ m{
((?: (?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x00-\xFF]|[^/\\] )+?) [/\\] }oxmsg
) {
push
@subpath
, $1;
}
my
$tail
=
pop
@subpath
;
my
$head
=
join
$pathsep
,
@subpath
;
return
$head
,
$tail
;
}
sub
UTF2::
ord
(;$) {
local
$_
=
shift
if
@_
;
if
(m/\A (
$q_char
) /oxms) {
my
@ord
=
unpack
'C*'
, $1;
my
$ord
= 0;
while
(
my
$o
=
shift
@ord
) {
$ord
=
$ord
* 0x100 +
$o
;
}
return
$ord
;
}
else
{
return
CORE::
ord
$_
;
}
}
sub
UTF2::ord_() {
if
(m/\A (
$q_char
) /oxms) {
my
@ord
=
unpack
'C*'
, $1;
my
$ord
= 0;
while
(
my
$o
=
shift
@ord
) {
$ord
=
$ord
* 0x100 +
$o
;
}
return
$ord
;
}
else
{
return
CORE::
ord
$_
;
}
}
sub
UTF2::
reverse
(@) {
if
(
wantarray
) {
return
CORE::
reverse
@_
;
}
else
{
return
join
''
, CORE::
reverse
(
join
(
''
,
@_
) =~ m/\G (
$q_char
) /oxmsg);
}
}
sub
UTF2::
length
(;$) {
local
$_
=
shift
if
@_
;
local
@_
= m/\G (
$q_char
) /oxmsg;
return
scalar
@_
;
}
sub
UTF2::
substr
($$;$$) {
my
@char
=
$_
[0] =~ m/\G (
$q_char
) /oxmsg;
if
(
@_
== 4) {
my
(
undef
,
$offset
,
$length
,
$replacement
) =
@_
;
my
$substr
=
join
''
,
splice
(
@char
,
$offset
,
$length
,
$replacement
);
$_
[0] =
join
''
,
@char
;
return
$substr
;
}
elsif
(
@_
== 3) {
my
(
undef
,
$offset
,
$length
) =
@_
;
if
(
$length
== 0) {
return
''
;
}
if
(
$offset
>= 0) {
return
join
''
, (
@char
[
$offset
..
$#char
])[0 ..
$length
-1];
}
else
{
return
join
''
, (
@char
[(
$#char
+
$offset
+1) ..
$#char
])[0 ..
$length
-1];
}
}
else
{
my
(
undef
,
$offset
) =
@_
;
if
(
$offset
>= 0) {
return
join
''
,
@char
[
$offset
..
$#char
];
}
else
{
return
join
''
,
@char
[(
$#char
+
$offset
+1) ..
$#char
];
}
}
}
sub
UTF2::
index
($$;$) {
my
$index
;
if
(
@_
== 3) {
$index
= Eutf2::
index
(
$_
[0],
$_
[1], CORE::
length
(UTF2::
substr
(
$_
[0], 0,
$_
[2])));
}
else
{
$index
= Eutf2::
index
(
$_
[0],
$_
[1]);
}
if
(
$index
== -1) {
return
-1;
}
else
{
return
UTF2::
length
(CORE::
substr
$_
[0], 0,
$index
);
}
}
sub
UTF2::
rindex
($$;$) {
my
$rindex
;
if
(
@_
== 3) {
$rindex
= Eutf2::
rindex
(
$_
[0],
$_
[1], CORE::
length
(UTF2::
substr
(
$_
[0], 0,
$_
[2])));
}
else
{
$rindex
= Eutf2::
rindex
(
$_
[0],
$_
[1]);
}
if
(
$rindex
== -1) {
return
-1;
}
else
{
return
UTF2::
length
(CORE::
substr
$_
[0], 0,
$rindex
);
}
}
sub
carp(@) {
my
(
$package
,
$filename
,
$line
) =
caller
(1);
print
STDERR
"@_ at $filename line $line.\n"
;
}
sub
croak(@) {
my
(
$package
,
$filename
,
$line
) =
caller
(1);
print
STDERR
"@_ at $filename line $line.\n"
;
die
"\n"
;
}
sub
cluck(@) {
my
$i
= 0;
my
@cluck
= ();
while
(
my
(
$package
,
$filename
,
$line
,
$subroutine
) =
caller
(
$i
)) {
push
@cluck
,
"[$i] $filename($line) $package::$subroutine\n"
;
$i
++;
}
print
STDERR
reverse
@cluck
;
print
STDERR
"\n"
;
carp
@_
;
}
sub
confess(@) {
my
$i
= 0;
my
@confess
= ();
while
(
my
(
$package
,
$filename
,
$line
,
$subroutine
) =
caller
(
$i
)) {
push
@confess
,
"[$i] $filename($line) $package::$subroutine\n"
;
$i
++;
}
print
STDERR
reverse
@confess
;
print
STDERR
"\n"
;
croak
@_
;
}
1;