#!/usr/bin/perl
use utf8; # encoding="utf-8"
use strict;
no warnings;
my $NAME = [
qr/Aleksandar/,
qr/Aleksandr Guidrevitch/,
qr/Alex$/,
qr/Alexander Lourier/,
qr/Alex Noso/,
qr/Alex Prota/,
qr/Alexe[iy]/,
qr/Aleksey/,
qr/Anatoly/,
qr/Andre[iy]/,
qr/Andrew (?:Maltsev|Shitov|V\. Ma)/,
qr/Anton\b/,
qr/Artur Penttinen/,
qr/Boris Sukholitko/,
qr/Denis \w+[oe]v\b/,
qr/Denis S/,
qr/(?:Evdokimov|Arapov) Denis/,
qr/Dmitr[iy]/,
qr/Egor/,
qr/Eugen Konkov/,
qr/Eugen Sob/,
qr/Gregory (?:A|Be)/,
qr/Igor/,
qr/Ilya/,
qr/Ivan/,
qr/Kirill/,
qr/Konstantin/,
qr/Maxim\b/,
qr/Mishka/,
qr/Mons Anderson/,
qr/Nic?kolay/,
qr/Oleg/,
qr/Pavel/,
qr/Ruslan/,
qr/Serge[iy]/,
qr/Timur/,
qr/Vadim/,
qr/Victor (?:M|\w+[oe]v\b)/,
qr/Vladimir/,
qr/Vladislav/,
qr/Walery/,
qr/Yur[iy]/,
qr/Zaha/,
qr/Zoffix/,
qr/[а-я]+/,
];
print
map { @$_ }
grep { warn scalar @$_ }
[
# for module
map { sprintf "%-9s => '%s',\n", grep { defined } @$_{'pause', 'name', 'email'} }
sort { $a->{'pause'} cmp $b->{'pause'} }
# for list
#~ map { join("\t", grep { defined } @$_{'id', 'name', 'pause', 'author', 'email', 'url'}), "\n" }
# check email
#~ map { qq{"$_->{'pause'}" $_->{'email'}\n} }
#~ grep { $_->{'email'} && $_->{'email'} !~ /CENSORED/ && $_->{'email'} !~ /\.ru$/ }
# cpan author
grep { $_->{'author'} }
# not cpan author, have pause id
# grep {!$_->{'author'} }
# ru
grep { $_->{'pause'} eq 'STEALTHY' and $_->{'author'}=0; 1}
grep { $_->{'pause'} !~ /^SODASODA|CUB|DZHARIY|IHEFFNER|QUEVLAR|VTI|GUGU|EJS|GRUBER|EGORSH|MVUETS|BEROV|CYBER|NIKIP|TTOD|OMEGA|DRINCHEV|YAKWOO|PLCGI|MADZ|VMS|SHTATLAND|IVAN|ILYA|GREGORY|ERIC|DIMRUB|ANDREI|AXS|MOO|ICHLADIL|IZUT|IKETRIS|IVANS|ITUB|IVANWILLS|IVANWONG|PAVELH|INITDOTD|RUSSOZ$/ }
grep { $_->{'email'} !~ /ua$/ }
grep { $_->{'email'} !~ /bg$/ }
grep { $_->{'email'} !~ /by$/ }
grep { $_->{'email'} !~ /ws$/ }
grep { $_->{'email'} !~ /cz$/ }
# ua
# grep { $_->{'pause'} =~ /^CUB|VTI|GUGU|EJS|GRUBER|MVUETS|DARKNOS|DZHARIY$/ || $_->{'email'} =~ /ua$/ }
# cyrillic
grep {
my $name = $_->{'name'};
scalar grep { $name =~ /$_/ } @$NAME;
}
# no
# grep { $_->{pause} !~ /^BDULFER$/ }
# grep { $_->{pause} =~ /^KIRILLM|KJETIL|SJN|MRAMBERG|KRN|ANDREMAR|TRONDMM|ARNE|EIDOLON|NICOMEN|COSIMO|AFF|ESPIZO$/ || $_->{'email'} =~ /no$/ }
map {
my($url, $name ) = $_->[ 1] =~ /([^<]+)/; $name ||= $_->[ 1]; $name =~ s/\s+$//; $name =~ s/&[^;]+;//sg;
my($url2, $pause) = $_->[-1] =~ /([^<]+)/; $pause ||= $_->[-1];
+{
'id' => $_->[0],
'name' => $name,
'url' => $url,
'email' => $_->[2],
'pause' => $pause,
'author' => $url2 ? 1 : 0,
};
}
map {
[m{^ (\d+)\. \s ([^<]+ | | \s) .*? \s*} }
grep { utf8::decode($_);1 }
map { $_->is_success ? $_->content =~ m{Archives(.*)