package String::CodiceFiscale; $String::CodiceFiscale::VERSION = '0.01'; use strict; no utf8; no locale; use base qw(Class::Data::Inheritable); use Time::Piece; use Carp; our %CRC = ( A => [0, 1], B => [1, 0], C => [2, 5], D => [3, 7], E => [4, 9], F => [5, 13], G => [6, 15], H => [7, 17], I => [8, 19], J => [9, 21], K => [10, 2], L => [11, 4], M => [12, 18], N => [13, 20], O => [14, 11], P => [15, 3], Q => [16, 6], R => [17, 8], S => [18, 12], T => [19, 14], U => [20, 16], V => [21, 10], W => [22, 22], X => [23, 25], Y => [24, 24], Z => [25, 23], 0 => [0, 1], 1 => [1, 0], 2 => [2, 5], 3 => [3, 7], 4 => [4, 9], 5 => [5, 13], 6 => [6, 15], 7 => [7, 17], 8 => [8, 19], 9 => [9, 21], ); __PACKAGE__->mk_classdata('ERROR'); our ($MONTHS, @MONTHS, %MONTHS); #code to/from month @MONTHS[1..12] = qw(A B C D E H L M P R S T); @MONTHS{@MONTHS[1..12]} = 1..12; $MONTHS = join '', @MONTHS[1..12]; our ($XNUMS, @XNUMS, %XNUMS); #coded numbers for rare collision cases @XNUMS = qw(L M N P Q R S T U V); @XNUMS{@XNUMS} = 0..9; #not used anymore, but here "just in case" $XNUMS = join '', @XNUMS; our $CONSONANTS = 'BCDFGHJKLMNPQRSTVWXYZ'; our $VOWELS = 'AEIOU'; our $RE_cf = qr/ ^ #start ([A-Z]{3}) #surname coded ([A-Z]{3}) #firstname coded ([\d$XNUMS]{2}) #year ([$MONTHS]) #month coded ([\d$XNUMS]{2}) #day and sex ([A-Z][\d$XNUMS]{3}) #birthplace coded ([A-Z]) #crc $ #end /xo; our $RE_nc = qr/^[$CONSONANTS]*[$VOWELS]*X*$/xo; our %OPTS = map {$_ => 1} qw( sn sn_c fn fn_c date year year_c month month_c day day_c sex bp bp_c ); sub new { my $class = shift; my $self = bless {}, $class; while (my ($k, $v) = splice(@_, 0, 2)) { $self->_croak(qq(Not such an options "$k")) unless $OPTS{$k}; $self->$k($v); } return $self; } sub sn { my $self = shift; my ($sn) = @_; if (defined $sn) { $sn = uc($sn); $self->{sn} = $sn; $self->{sn_c} = undef; $self->{sn_re} = undef; } return $sn; } sub sn_c { my $self = shift; my ($sn_c) = @_; if (defined $sn_c) { $sn_c = uc($sn_c); unless ($sn_c =~ /$RE_nc/) { $self->error('Coded surname cannot contain ' . 'vowels followed by consonants'); return; } unless (length($sn_c) == 3) { $self->error('Coded surname must be 3 chars in length'); return; } $self->{sn_c} = $sn_c; $self->{sn} = undef; $self->{sn_re} = undef; } if (defined $self->{sn} and not defined $self->{sn_c}) { my $temp = ''; OUTER: { while ($self->{sn} =~ /([$CONSONANTS])/go) { $temp .= $1; last OUTER if length $temp >= 3; } while ($self->{sn} =~ /([$VOWELS])/go) { $temp .= $1; last OUTER if length $temp >= 3; } while (length $temp < 3) { $temp .= 'X'; } } $self->{sn_c} = $temp; } return $self->{sn_c}; } sub sn_re { my $self = shift; return $self->_n_re('sn_c'); } sub sn_match { my $self = shift; my ($tm) = @_; return unless defined $tm; $tm = uc $tm; $self->_fix_name($tm); if (defined(my $sn = $self->sn)) { $self->_fix_name($sn); return $tm eq $self->sn; } if (defined $self->sn_c) { return $tm =~ $self->sn_re; } return; } sub fn { my $self = shift; my ($fn) = @_; if (defined $fn) { $fn = uc($fn); $self->{fn} = $fn; $self->{fn_c} = undef; $self->{fn_re} = undef; } return $fn; } sub fn_c { my $self = shift; my ($fn_c) = @_; if (defined $fn_c) { $fn_c = uc($fn_c); unless ($fn_c =~ /$RE_nc/) { $self->error('Coded name cannot contain ' . 'vowels followed by consonants'); return; } unless (length($fn_c) == 3) { $self->error('Coded name must be 3 chars in length'); return; } $self->{fn_c} = $fn_c; $self->{fn} = undef; $self->{fn_re} = undef; } if (defined $self->{fn} and not defined $self->{fn_c}) { my $temp = ''; my $skip = $self->_count_consonants($self->{fn}) > 3; OUTER: { while ($self->{fn} =~ /([$CONSONANTS])/go) { if ($skip and length($temp) == 1) { $skip = 0; next; } $temp .= $1; last OUTER if length $temp >= 3; } while ($self->{fn} =~ /([$VOWELS])/go) { $temp .= $1; last OUTER if length $temp >= 3; } while (length $temp < 3) { $temp .= 'X'; } } $self->{fn_c} = $temp; } return $self->{fn_c}; } sub fn_re { my $self = shift; return $self->_n_re('fn_c'); } sub fn_match { my $self = shift; my ($tm) = @_; return unless defined $tm; $tm = uc $tm; $self->_fix_name($tm); if (defined(my $fn = $self->fn)) { $self->_fix_name($fn); return $tm eq $fn; } if (defined $self->fn_c) { return $tm =~ $self->fn_re; } return; } sub date { my $self = shift; my ($date) = @_; if (defined $date) { my $t; eval { $t = Time::Piece->strptime($date, '%Y-%m-%d') }; if ($@) { $self->error("Invalid date"); return; } my %date = ( year => $t->year, month => $t->mon, day => $t->mday, ); for (qw(year month day)) { unless ( $self->$_($date{$_}) ) { $self->error("Couldn't parse $_"); return; } } } else { my %date; for (qw(year month day)) { $date{$_} = $self->$_; unless (defined $date{$_}) { $self->error("Couldn't retrieve $_"); return; } } return sprintf("%04d-%02d-%02d", @date{qw(year month day)}); } return $date; } sub year { my $self = shift; my ($y) = @_; if (defined $y) { unless ($y =~ /^\d+$/) { $self->error('A year should be an unsigned integer'); return; } $self->{year} = $y; $self->{year_c} = undef; } if (not defined $self->{year} and defined $self->{year_c}) { my $year = $self->_xnums($self->{year_c}); my $cy = (localtime(time))[5] % 100; # this is making a guess if ($year > $cy) { $self->{year} = sprintf "19%02d", $year; } else { $self->{year} = sprintf "20%02d", $year; } } return $self->{year}; } sub year_c { my $self = shift; my ($ycx) = @_; if (defined $ycx) { my $yc = $self->_xnums($ycx); unless ($yc =~ /^\d\d$/) { $self->error('A year in Codice Fiscale is 2 digit long'); return; } $self->{year_c} = $ycx; $self->{year} = undef; } if (not defined $self->{year_c} and defined $self->{year}) { $self->{year_c} = $self->{year} % 100; } return $self->{year_c}; } sub month { my $self = shift; my ($m) = @_; if (defined $m) { unless ($m =~ /^\d+$/ and $m >= 1 and $m <= 12) { $self->error('Month must be numeric and between 1 and 12'); return; } $self->{month} = $m; $self->{month_c} = undef; } if (not defined $self->{month} and defined $self->{month_c}) { $self->{month} = $MONTHS{$self->{month_c}}; } return $self->{month}; } sub month_c { my $self = shift; my ($mc) = @_; if (defined $mc) { unless ($mc =~ /^[$MONTHS]$/o) { $self->error('Month not correctly encoded'); return; } $self->{month_c} = $mc; $self->{month} = undef; } if (not defined $self->{month_c} and defined $self->{month}) { $self->{month_c} = $MONTHS[$self->{month}]; } return $self->{month_c}; } sub day { my $self = shift; my ($d) = @_; if (defined $d) { unless ($d =~ /^\d+$/ and 1 <= $d and $d <= 31) { $self->error('Day is out of range'); return; } $self->{day} = $d; $self->{day_c} = undef; } if (not defined $self->{day} and defined $self->{day_c}) { my $dayx = $self->_xnums($self->{day_c}); $self->{day} = $dayx > 40 ? $dayx - 40 : $dayx; } return $self->{day}; } sub day_c { my $self = shift; my ($dcx) = @_; if (defined $dcx) { my $dc = $self->_xnums($dcx); unless ($dc =~ /^\d+$/) { $self->error('Invalid coding of day'); return; } unless ($dc > 0 and not ($dc > 31 and $dc < 41) and $dc <= 71) { $self->error('Day out of range'); return; } $self->{day_c} = $dcx; $self->{day} = undef; $self->{sex} = undef; } if (not defined $self->{day_c} and defined $self->{day} and defined $self->{sex}) { $self->{day_c} = $self->{day}; $self->{day_c} += 40 if $self->{sex} eq 'F'; } return $self->{day_c}; } sub sex { my $self = shift; my ($sex) = @_; if (defined $sex) { unless ($sex =~ /^[MF]$/i) { $self->error('Sex can be either "M" or "F"'); return; } $self->{sex} = $sex; $self->{day_c} = undef; } if (not defined $self->{sex} and defined $self->{day_c}) { my $dayx = $self->_xnums($self->{day_c}); $self->{sex} = $dayx > 40 ? 'F' : 'M'; } return $self->{sex}; } sub bp { my $self = shift; my ($bp) = @_; if (defined $bp) { unless ($bp =~ /^[A-Z]\d\d\d$/) { # we could improve this $self->error('Invalid birthplace code'); return; } $self->{bp} = $bp; $self->{bp_c} = undef; } if (not defined $self->{bp} and defined $self->{bp_c}) { my $bpc = $self->{bp_c}; substr($bpc, 1) = $self->_xnums(substr($bpc, 1)); $self->{bp} = $bpc; } return $self->{bp}; } sub bp_c { my $self = shift; my ($bpcx) = @_; if (defined $bpcx) { my $bpc = $bpcx; substr($bpc, 1) = $self->_xnums(substr($bpc, 1)); unless ($bpc =~ /^[A-Z]\d\d\d$/) { # we could improve this $self->error('Invalid birthplace code'); return; } $self->{bp_c} = $bpcx; $self->{bp} = undef; } if (not defined $self->{bp_c} and defined $self->{bp}) { $self->{bp_c} = $self->{bp}; } return $self->{bp_c}; } sub bd_c { my $self = shift; my $bdc = ''; for (qw(year_c month_c day_c)) { my $t = $self->$_; unless (defined $t) { $self->error("Could not produce $_: some data is missing"); return; } $bdc .= $t; } return $bdc; } sub cf { my $self = shift; return $self->_crc(1); } sub crc { my $self = shift; return $self->_crc(0); } sub cf_nocrc { my $self = shift; my $cf = ''; for (qw(sn_c fn_c bd_c bp_c)) { my $t = $self->$_; unless (defined $t) { $self->error("Could not produce $_: some data is missing"); return; } $cf .= $t; } my $nums = substr($cf, 6, 2) . substr($cf, 9, 2) . substr($cf, 12, 3); unless ($self->_xnums($nums)) { $self->error('Invalid special characters'); return; } return $cf; } sub _crc { my $self = shift; my ($cf_out) = @_; my $cf = $self->cf_nocrc; unless ($cf) { $self->error("Cannot produce a Codice Fiscale: missing data"); return; } my $count = 0; for (my $i = 0; $i <= 14; $i++) { $count += $CRC{substr($cf, $i, 1)}[($i + 1) % 2]; } $count %= 26; return ($cf_out ? $cf : '') . chr(65 + $count); } sub parse { my $proto = shift; my ($cf) = @_; $cf = uc $cf; unless (length($cf) == 16) { $proto->error('A valid Codice Fiscale must be exactly 16 chars long'); return; } my ($sn, $fn, $year, $month, $dayx, $born, $crc) = $cf =~ /$RE_cf/; unless ($crc) { $proto->error('Cannot parse: invalid format'); return; } my $obj = $proto->new( sn_c => $sn, fn_c => $fn, year_c => $year, month_c => $month, day_c => $dayx, bp_c => $born, ); unless ($crc eq $obj->crc) { $proto->error('Invalid control character'); return; } return $obj; } sub validate { my $proto = shift; my ($cf) = @_; my $obj = $proto->parse($cf); return 1 if $obj; return; } sub error { my $proto = shift; my ($err) = @_; if (ref $proto) { $proto->{_err} = $err if defined $err; return $proto->{_err}; } $proto->ERROR($err) if defined $err; return $proto->ERROR; } { my $tr_xnums = eval "sub {\$_[0] =~ tr/$XNUMS/0123456789/}"; sub _xnums { my $self = shift; my ($nums) = @_; return unless $nums =~ /^\d*[$XNUMS]*$/o; $tr_xnums->($nums); return $nums; } } sub _n_re { my $self = shift; my ($method) = @_; (my $attr = $method) =~ s/_c$/_re/; return $self->{$attr} if defined $self->{$attr}; my $nc = $self->$method; unless ($nc) { $self->error('There is no coded ' . ($method eq 'sn_c' ? 'sur' : '') . 'name set'); return; } my ($c, $v, $x) = $nc =~ /^([$CONSONANTS]*)([$VOWELS]*)(X*)$/o; my $pat; if (3 == length $c) { my @c = split('', $c); if ($method eq 'fn_c') { $pat = qr/^(?: [$VOWELS]* $c[0] [$VOWELS]* [$CONSONANTS] [$VOWELS]* $c[1] [$VOWELS]* $c[2] [A-Z]* | [$VOWELS]* $c[0] [$VOWELS]* $c[1] [$VOWELS]* $c[2] [$VOWELS]* )$/xi; } else { $pat = qr/^ [$VOWELS]* $c[0] [$VOWELS]* $c[1] [$VOWELS]* $c[2] [A-Z]* $/xi; } } elsif (2 == length($c) and 1 == length($v)) { my @c = split('', $c); $pat = qr/^(?: $v [$VOWELS]* $c[0] [$VOWELS]* $c[1] [$VOWELS]* | $c[0] $v [$VOWELS]* $c[1] [$VOWELS]* | $c[0] $c[1] $v [$VOWELS]* )$/xi; } elsif (1 == length($c) and 2 == length($v)) { my @v = split('', $v); $pat = qr/^(?: $c $v[0] $v[1] [$VOWELS]* | $v[0] $c $v[1] [$VOWELS]* | $v[0] $v[1] [$VOWELS]* $c [$VOWELS]* )$/xi; } elsif (3 == length $v) { $pat = qr/^ $v [$VOWELS]* $/xi; } elsif (1 == length $x) { if (1 == length($c)) { $pat = qr/^(?: $c $v | $v $c )$/xi; } else { $pat = qr/^ $v $/xi; } } elsif (2 == length $x) { $pat = qr/^ $v $/xi; } else { $pat = qr/^ .* $/xi; } return $self->{$attr} = $pat; } sub _fix_name { $_[1] =~ tr/àÀèéÈÉìÌòÒùÙ/AAEEEEIIOOUU/; $_[1] =~ tr/a-zA-Z//cd; } { my $count_consonants = eval "sub {\$_[0] =~ tr/$CONSONANTS/$CONSONANTS/}"; sub _count_consonants { return $count_consonants->($_[1]) } } sub _croak { my $self = shift; confess @_; } 1; __END__ =head1 NAME String::CodiceFiscale - convert personal data into italian Codice Fiscale =head1 SYNOPSIS use String::CodiceFiscale; $obj = String::CodiceFiscale->new( sn => 'Wall', # surname fn => 'Larry', # first name date => '1987-12-18', # Perl's birthday sex => 'M', # M or F bp => 'Z404', # birthplace, Codice Catastale code ); print $obj->cf, "\n"; # prints Codice Fiscale # and the other way around $obj = String::CodiceFiscale->parse('WLLLRY87T18Z404B'); unless ($obj) { # check for errors print "We have an error: " . String::CodiceFiscale->error; } print "This " . ($obj->sex eq 'M' ? 'guy' : 'chick') . " was born on " . $obj->date . " (unless he's more than 100)\n"; for (qw(Wallace Wall Weeler Awalala)) { print "$_\t could be his surname\n" if $obj->sn_match($_); } for (qw(Ilary Elryk Larry Kilroy Leroy)) { print "$_\t could be his first name\n" if $obj->fn_match($_); } =head1 DESCRIPTION String::CodiceFiscale might help you in the tricky task of verifying and/or producing a Codice Fiscale. It also gives you some utilities to "reverse engineer" a given Codice Fiscale and find out what personal data could have produce it. For more info about the Codice Fiscale format see the Appendix. Please note that [] "square brackets" in the following documentation will mark optional parameters and not anonymous array references. =head1 CLASS METHODS =over 4 =item new([%PARAMS]) Creates a new object. It receives parameters in hash fashion and will use every key of the hash as an object method called with the respective value. See below for possible methods. =item parse(CF) Creates a new object from parsing the given STRING as a Codice Fiscale. It won't return a valid object if the given Codice Fiscale won't pass some validation checks. =item validate(CF) Utility method. It will return a true value if STRING is a valid Codice Fiscale. Unless it will return a false value. =item error() Returns a string containing a descriptive error of what went wrong during the last failed call to a class method. =back =head1 OBJECT METHODS All get/set methods give you back the actual value of the attribute. If you provide a STRING they will try to set the attribute after some validation checks. If these checks fail the method will return a false value. Otherwise it will return the value you provided. =head2 GET/SET METHODS =over 4 =item sn([SURNAME]) Get/set method to retrieve or set the surname. =item fn([FIRST_NAME]) Get/set method to retrieve or set the first name. =item date([YYYY-MM-DD]) Get/set the date of birth. It can parse only dates provided in the ISO 8601 format (YYYY-MM-DD). The year could have the same problems discussed in the year() method. =item year([YEAR]) Get/set method for year. Please note that Codice Fiscale code HAS the Millenium Bug. So if you're asking for a year after parsing a codice fiscale what you will get will be a guess about what the year of birth is: this could be wrong for people older than 100. =item month([MONTH]) Get/set method to retrieve or set the month. =item day([DAY_OF_MONTH]) Get/set method to retrieve or set the day of month. =item sex([SEX]) Get/set method for sex. Accept "M" for male and "F" for female. =item bp([BIRTH_PLACE]) Get/set method for birthplace. The birthplace must be already encoded in the codice catastale form and match /^[A-Z]\d\d\d$/ . No lookup of city names is provided yet. =back =head2 ENCODING METHODS =over 4 =item cf Try to give you a valid codice fiscale. It will return a false value if some data is missing. Note how the generated codice fiscale has no warranty to be unique. =item crc Gives back just the control character. Return a false value on failure. =back =head2 REVERSE ENGINEERING METHODS =over 4 =item sn_match(STRING) Matches if STRING could be the surname that was used to generate the codice fiscale previously acquired through the parse() method. Please beware that there are infinite surnames that could produce the same coding in codice fiscale. =item fn_match(STRING) Matches if STRING could be the first name that was used to generate the codice fiscale previously acquired through the parse() method. See sn_match() for more info. =back =head1 APPENDIX Yet to be written. It would likely contain more info and caveats about the codice fiscale algorithm. =head1 TO DO - Perfect the error handling - Write more documentation and clear up obscure points - Create alias for methods whose names are less than obvious - Italian documentation and italian aliases =head1 AUTHOR Giulio Motta, Egiulienk@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2003 by Giulio Motta This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut