The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use 5.006;
use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);

our %EXPORT_TAGS = (
  'all' => [
    qw(
      ptbr_gender
      )
  ]
);

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
);

our $VERSION = '1.01';

=head1 NAME

Lingua::PT::Gender - Decides if a Portuguese proper name is male or female

=head1 SYNOPSIS

  use Lingua::PT::Gender qw/ptbr_gender/;
  
  $result = ptbr_gender("Marco Carnut");
  # $result now holds 1 meaning 'male'
  
  $result = ptbr_gender("Ana Paula");
  # $result = now holds 0 meaning 'female'

=head1 DESCRIPTION

This module provides a routine to decide whether a Portuguese name is male
or female. The algorithm examines a table of suffixes to determine this.

The table was computed using a recursive space subdivision algorithm operating
on a database of about 60,000 proper names.

Typical accuracy is greater than 99%. This makes it useful to find enrollment
errors in databases.

=head2 ptbr_gender

This is the only function in this module. It returns 0 for female or 1 for
male. Comparisons are case insensitive. It expects non-accented letters;
it is your responsibility to strip them beforehand. The routine considers
only the first name (word) on the string; all others are ignored.

A simple filter that gets names from the standard input and prefixes them
with M or F accordingly:

   #!/usr/bin/perl
   
   use Lingua::PT::Gender qw/ptbr_gender/; 

   while (<>)
   {
       print ptbr_gender($_) ? "M" : "F";
       print " $_";
   }
   
=head1 LICENSE

   GPL2 - http://www.gnu.org/licenses/gpl.txt

=head1 AUTHOR

   Marco "Kiko" Carnut <kiko at tempest.com.br>
   http://www.postcogito.org/

=cut

sub ptbr_gender
{
    my ($name) = @_;
    my ($n)=split(/\s+/,$name);
    my $out;
    if ($n =~ /a$/oi) {
        $out=0;
        $out=1 if $n =~ /wilba$|rba$|vica$|milca$|meida$|randa$/oi
               || $n =~ /uda$|rrea$|afa$|^ha$|cha$|oha$|apha$/oi
               || $n =~ /natha$|^elia$|rdelia$|remia$|aja$/oi
               || $n =~ /rja$|aka$|kka$|^ala$|gla$|tila$|vila$/oi
               || $n =~ /cola$|orla$|nama$|yama$|inima$|jalma$/oi
               || $n =~ /nma$|urma$|zuma$|gna$|tanna$|pna$/oi
               || $n =~ /moa$|jara$|tara$|guara$|beira$|veira$/oi
               || $n =~ /kira$|uira$|pra$|jura$|mura$|tura$/oi
               || $n =~ /asa$|assa$|ussa$|^iata$|onata$|irata$/oi
               || $n =~ /leta$|preta$|jota$|ista$|aua$|dua$/oi
               || $n =~ /hua$|qua$|ava$|dva$|^iva$|silva$|ova$/oi
               || $n =~ /rva$|wa$|naya$|ouza$/oi;
    }
    elsif ($n =~ /b$/oi) {
        $out=1;
        $out=0 if $n =~ /inadab$/oi;
    }
    elsif ($n =~ /c$/oi) {
        $out=1;
        $out=0 if $n =~ /lic$|tic$/oi;
    }
    elsif ($n =~ /d$/oi) {
        $out=1;
        $out=0 if $n =~ /edad$|rid$/oi;
    }
    elsif ($n =~ /e$/oi) {
        $out=0;
        $out=1 if $n =~ /dae$|jae$|kae$|oabe$|ube$|lace$|dece$/oi 
               || $n =~ /felice$|urice$|nce$|bruce$|dade$|bede$/oi 
               || $n =~ /^ide$|^aide$|taide$|cide$|alide$|vide$/oi 
               || $n =~ /alde$|hilde$|asenilde$|nde$|ode$|lee$/oi 
               || $n =~ /^ge$|ege$|oge$|rge$|uge$|phe$|bie$/oi 
               || $n =~ /elie$|llie$|nie$|je$|eke$|ike$|olke$/oi 
               || $n =~ /nke$|oke$|ske$|uke$|tale$|uale$|vale$/oi 
               || $n =~ /cle$|rdele$|gele$|tiele$|nele$|ssele$/oi 
               || $n =~ /uele$|hle$|tabile$|lile$|rile$|delle$/oi 
               || $n =~ /ole$|yle$|ame$|aeme$|deme$|ime$|lme$/oi 
               || $n =~ /rme$|sme$|ume$|yme$|phane$|nane$|ivane$/oi 
               || $n =~ /alvane$|elvane$|gilvane$|ovane$|dene$/oi 
               || $n =~ /ociene$|tiene$|gilene$|uslene$|^rene$/oi 
               || $n =~ /vaine$|waine$|aldine$|udine$|mine$/oi 
               || $n =~ /nine$|oine$|rtine$|vanne$|renne$|hnne$/oi 
               || $n =~ /ionne$|cone$|done$|eone$|fone$|ecione$/oi 
               || $n =~ /alcione$|edione$|hione$|jone$|rone$/oi 
               || $n =~ /tone$|rne$|une$|ioe$|noe$|epe$|ipe$/oi 
               || $n =~ /ope$|ppe$|ype$|sare$|bre$|dre$|bere$/oi 
               || $n =~ /dere$|fre$|aire$|hire$|ore$|rre$|tre$/oi 
               || $n =~ /dse$|ese$|geise$|wilse$|jose$|rse$/oi 
               || $n =~ /esse$|usse$|use$|aete$|waldete$|iodete$/oi 
               || $n =~ /sdete$|aiete$|nisete$|ezete$|nizete$/oi 
               || $n =~ /dedite$|uite$|lte$|ante$|ente$|arte$/oi 
               || $n =~ /laerte$|herte$|ierte$|reste$|aue$/oi 
               || $n =~ /gue$|oue$|aque$|eque$|aique$|inique$/oi 
               || $n =~ /rique$|lque$|oque$|rque$|esue$|osue$/oi 
               || $n =~ /ozue$|tave$|ive$|ove$|we$|ye$|^ze$/oi 
               || $n =~ /aze$|eze$|uze$/oi;
    }
    elsif ($n =~ /f$/oi) {
        $out=1;
    }
    elsif ($n =~ /g$/oi) {
        $out=1;
        $out=0 if $n =~ /eig$|heng$|mping$|bong$|jung$/oi;
    }
    elsif ($n =~ /h$/oi) {
        $out=1;
        $out=0 if $n =~ /kah$|nah$|rah$|sh$|beth$|reth$|seth$/oi 
               || $n =~ /lizeth$|rizeth$|^edith$|udith$|ruth$/oi;
    }
    elsif ($n =~ /i$/oi) {
        $out=1;
        $out=0 if $n =~ /elai$|anai$|onai$|abi$|djaci$|glaci$/oi 
               || $n =~ /maraci$|^iraci$|diraci$|loraci$|ildeci$/oi 
               || $n =~ /^neci$|aici$|arici$|^elci$|nci$|oci$/oi 
               || $n =~ /uci$|kadi$|leidi$|ridi$|hudi$|hirlei$/oi 
               || $n =~ /sirlei$|^mei$|rinei$|ahi$|^ji$|iki$/oi 
               || $n =~ /isuki$|^yuki$|gali$|rali$|ngeli$|ieli$/oi 
               || $n =~ /keli$|leli$|neli$|seli$|ueli$|veli$/oi 
               || $n =~ /zeli$|ili$|helli$|kelli$|arli$|wanderli$/oi 
               || $n =~ /hami$|iemi$|oemi$|romi$|tmi$|ssumi$/oi 
               || $n =~ /yumi$|zumi$|bani$|iani$|irani$|sani$/oi 
               || $n =~ /tani$|luani$|^vani$|^ivani$|ilvani$/oi 
               || $n =~ /yani$|^eni$|ceni$|geni$|leni$|ureni$/oi 
               || $n =~ /^oseni$|veni$|zeni$|cini$|eini$|lini$/oi 
               || $n =~ /jenni$|moni$|uni$|mari$|veri$|hri$/oi 
               || $n =~ /aori$|ayuri$|lsi$|rsi$|gessi$|roti$/oi 
               || $n =~ /sti$|retti$|uetti$|aui$|iavi$|^zi$/oi 
               || $n =~ /zazi$|suzi$/oi;
    }
    elsif ($n =~ /j$/oi) {
        $out=1;
    }
    elsif ($n =~ /k$/oi) {
        $out=1;
        $out=0 if $n =~ /nak$|lk$/oi;
    }
    elsif ($n =~ /l$/oi) {
        $out=1;
        $out=0 if $n =~ /mal$|^bel$|mabel$|rabel$|sabel$|zabel$/oi 
               || $n =~ /achel$|thel$|quel$|gail$|lenil$|mell$/oi 
               || $n =~ /ol$/oi;
    }
    elsif ($n =~ /m$/oi) {
        $out=1;
        $out=0 if $n =~ /liliam$|riam$|viam$|miram$|eem$|uelem$/oi 
               || $n =~ /mem$|rem$/oi;
    }
    elsif ($n =~ /n$/oi) {
        $out=1;
        $out=0 if $n =~ /lilian$|lillian$|marian$|irian$|yrian$/oi 
               || $n =~ /ivian$|elan$|rilan$|usan$|nivan$|arivan$/oi 
               || $n =~ /iryan$|uzan$|ohen$|cken$|elen$|llen$/oi 
               || $n =~ /men$|aren$|sten$|rlein$|kelin$|velin$/oi 
               || $n =~ /smin$|rin$|istin$|rstin$|^ann$|ynn$/oi 
               || $n =~ /haron$|kun$|sun$|yn$/oi;
    }
    elsif ($n =~ /o$/oi) {
        $out=1;
        $out=0 if $n =~ /eicao$|eco$|mico$|tico$|^do$|^ho$/oi 
               || $n =~ /ocio$|ako$|eko$|keiko$|seiko$|chiko$/oi 
               || $n =~ /shiko$|akiko$|ukiko$|miko$|riko$|tiko$/oi 
               || $n =~ /oko$|ruko$|suko$|yuko$|izuko$|uelo$/oi 
               || $n =~ /stano$|maurino$|orro$|jeto$|mento$/oi 
               || $n =~ /luo$/oi;
    }
    elsif ($n =~ /p$/oi) {
        $out=1;
        $out=0 if $n =~ /yip$/oi;
    }
    elsif ($n =~ /r$/oi) {
        $out=1;
        $out=0 if $n =~ /lar$|lamar$|zamar$|ycimar$|idimar$/oi 
               || $n =~ /eudimar$|olimar$|lsimar$|lzimar$|erismar$/oi 
               || $n =~ /edinar$|iffer$|ifer$|ather$|sther$/oi 
               || $n =~ /esper$|^ester$|madair$|eclair$|olair$/oi 
               || $n =~ /^nair$|glacir$|^nadir$|ledir$|^vanir$/oi 
               || $n =~ /^evanir$|^cenir$|elenir$|zenir$|ionir$/oi 
               || $n =~ /fior$|eonor$|racyr$/oi;
    }
    elsif ($n =~ /s$/oi) {
        $out=1;
        $out=0 if $n =~ /unidas$|katias$|rces$|cedes$|oides$/oi 
               || $n =~ /aildes$|derdes$|urdes$|leudes$|iudes$/oi 
               || $n =~ /irges$|lkes$|geles$|elenes$|gnes$/oi 
               || $n =~ /^ines$|aines$|^dines$|rines$|pes$/oi 
               || $n =~ /deres$|^mires$|amires$|ores$|neves$/oi 
               || $n =~ /hais$|lais$|tais$|adis$|alis$|^elis$/oi 
               || $n =~ /ilis$|llis$|ylis$|ldenis$|annis$|ois$/oi 
               || $n =~ /aris$|^cris$|^iris$|miris$|siris$/oi 
               || $n =~ /doris$|yris$|isis$|rtis$|zis$|heiros$/oi 
               || $n =~ /dys$|inys$|rys$/oi;
    }
    elsif ($n =~ /t$/oi) {
        $out=1;
        $out=0 if $n =~ /bet$|ret$|^edit$|git$|est$|nett$|itt$/oi;
    }
    elsif ($n =~ /u$/oi) {
        $out=1;
        $out=0 if $n =~ /^du$|alu$|^miharu$|^su$/oi;
    }
    elsif ($n =~ /v$/oi) {
        $out=1;
    }
    elsif ($n =~ /w$/oi) {
        $out=1;
    }
    elsif ($n =~ /x$/oi) {
        $out=1;
    }
    elsif ($n =~ /y$/oi) {
        $out=1;
        $out=0 if $n =~ /may$|anay$|ionay$|lacy$|^aracy$|^iracy$/oi 
               || $n =~ /doracy$|vacy$|aricy$|oalcy$|ncy$|nercy$/oi 
               || $n =~ /ucy$|lady$|hedy$|hirley$|raney$|gy$/oi 
               || $n =~ /ahy$|rothy$|taly$|aely$|ucely$|gely$/oi 
               || $n =~ /kely$|nely$|sely$|uely$|vely$|zely$/oi 
               || $n =~ /aily$|rily$|elly$|marly$|mony$|tamy$|iany$/oi 
               || $n =~ /irany$|sany$|uany$|lvany$|wany$|geny$/oi 
               || $n =~ /leny$|ueny$|anny$|mary$|imery$|smery$/oi 
               || $n =~ /iry$|rory$|isy$|osy$|usy$|ty$/oi;
    }
    elsif ($n =~ /z$/oi) {
        $out=1;
        $out=0 if $n =~ /^inez$|rinez$|derez$|liz$|riz$|uz$/oi;
    }
    return $out;
}

1;