#!/usr/bin/perl -w package Mail::Miner::Recogniser::Address; $Mail::Miner::recognisers{"".__PACKAGE__} = { title => "Physical Addresses", help => "Match messages which contain an address", keyword => "address", }; my $us_state = qr/(?:A[LKSZREAEEEP]| C[AOT]|D[EC]|F[ML]|G[AU]|HI|I[DLNA]|K[SY]|LA|M[EHDAINSOTP]|N[EVHJMYCD]| O[HKR]|P[WAR]|RI|S[CD]|T[NX]|UT|V[TIA]|W[AVIY])/x; my $uk_post_town = qr/(?:AL|B[ABDHLNRST]?|C[ABFHMORTVW]|D[ADEGHLNTY]|E[CHNX]?|F[KY]| G[LUY]?|H[ADGPRSUX]|I[GMPV]|JE|K[ATWY]|L[ADELNSU]?|M[EKL]?|N[EGNPRW]?| O[LX]|P[ORAEHL]|R[GHM]|S[AEGKLMNOPRSTWY]?|T[ADFNQRSW]|UB|W[ACDFNRSV]?| YO|ZE)/x; my $uk_postcode = qr/$uk_post_town\d{1,3}[ \t]+\d{1,2}[A-Z][A-Z]/; my $us_zipcode = qr/$us_state[ \t]+\d{5}/; sub process { my ($class, %hash) = @_; my @lines = split /\n/, $hash{getbody}->(); my @found; my $last =0; for (0..$#lines) { if ($lines[$_] =~ /(.*\b($uk_postcode|$us_zipcode)\b)/) { if ($_ - $last > 10) { $last = $_-10 } # Max of 10 lines my $address = join "\n", @lines[$last+1..$_]; # Trim whitespace and quoters $address =~ s/^\s* (?:[A-Z][A-Z]>)? # SUPERCITE [\s>:]+//msgox; push @found, $address; } elsif ($lines[$_] !~ /\w/) { $last = $_; } } return @found; } 1;