package Regexp::Common::profanity_us; use strict; local $^W = 1; use Data::Dumper; use Regexp::Common qw /pattern clean no_defaults/; use vars qw /$VERSION/; $VERSION = '2.2'; sub longest_first { length($b) <=> length($a) } my ($profanity, @profanity); while () { last if /__END__/; # warn $_; next if /^#/; next if /^\s*$/; s/^\s+//; s/\s+$//; push @profanity, $_; } pattern name => [qw (profanity us normal label -dist=7)], create => sub { my ($self, $flags) = @_; my $word_width = $flags->{-dist}; my $any_char = ".{0,$word_width}"; @profanity = map { s/-/$any_char/g; $_ } @profanity; $profanity = join '|', @profanity; $profanity = '(?k:' . $profanity . ')'; #warn $profanity; $profanity; }, ; 1; __DATA__ # relating to the penis: big-dick big-prick super-prick meaty-ball deez-nut big-n-hard big-and-hard chester-the-pussy-molester hard-on hot-cock # terms referring to untruths bull-shit load-of-crap # sexual act cock-suck suck-my-cock blow-job facial-fetish fuck suck-(cock|dick) hand-job jack-off jerk-off (lick|suck)-(cock|dick|nipples|tits) # groin crotch # buttocks ass-crack butt-crack # terms referring to an aggravating person dick-head prick-head ass-hole bastard # nerd/wimp terms punk-ass pussy-ass faggot dick-less # expletives (like bloody) m(o|u)th(er|a|)-fuck god-dam shitty-ass # racial terms nigg?(a|er|uh) # sexist terms bitch whore # telling someone to get lost suck-my-ass hug-my-nuts goto-hell eat-shit shit-eater shit-head turd-head shit-face suck-my-cock fuck-off # unpleasant bodily acts eat-poop smell-farts half-assed piss--face piss--ass poop--face piss-drink drink-piss # vaginal pussies hot-puss juicy-puss smelly-puss funky-puss white-puss black-puss asian-puss sex-puss sex-clit juic-clit # things I have seen in my inbox :) milk-my-breasts __END__ =pod =head1 NAME Regexp::Common::profanity_us -- provide regexes for U.S. profanity =head1 SYNOPSIS use Regexp::Common qw /profanity_us/; my $RE = $RE{profanity}{us}{normal}{label}{-keep}{-dist=>3}; while (<>) { warn "PROFANE" if /$RE/; } Or easier use Regexp::Profanity::US; $profane = profane ($string); @profane = profane_list($string); =head1 OVERVIEW Instead of a dry technical overview, I am going to explain the structure of this module based on its history. I consult at a company that generates customer leads primarily by having websites that attract people (e.g. lowering loan values, selling cars, buying real estate, etc.). For some reason we get more than our fair share of profane leads. For this reason I was told to write a profanity checker. For the data that I was dealing with, the profanity was most often in the email address or in the first or last name, so I naively started filtering profanity with a set of regexps for that sort of data. Note that both names and email addresses are unlike what you are reading now: they are not whitespace-separated text, but are instead labels. Therefore full support for profanity checking should work in 2 entirely different contexts: labels (email, names) and text (what you are reading). Because open-source is driven by demand and I have no need for detecting profanity in text, only C