#======================================================================== # # Badger::Utils # # DESCRIPTION # Module implementing various useful utility functions. # # AUTHOR # Andy Wardley # #======================================================================== package Badger::Utils; use strict; use warnings; use base 'Badger::Exporter'; use File::Path; use Scalar::Util qw( blessed ); use Badger::Constants 'HASH PKG DELIMITER BLANK'; use Badger::Debug import => ':dump', default => 0; use overload; use constant { UTILS => 'Badger::Utils', CLASS => 0, FILE => 1, LOADED => 2, }; our $VERSION = 0.01; #our $DEBUG = 0 unless defined $DEBUG; our $ERROR = ''; our $WARN = sub { warn @_ }; # for testing - see t/core/utils.t our $MESSAGES = { }; our $HELPERS = { # keep this compact in case we don't need to use it 'Digest::MD5' => 'md5 md5_hex md5_base64', 'Scalar::Util' => 'blessed dualvar isweak readonly refaddr reftype tainted weaken isvstring looks_like_number set_prototype', 'List::Util' => 'first max maxstr min minstr reduce shuffle sum', 'List::MoreUtils' => 'any all none notall true false firstidx first_index lastidx last_index insert_after insert_after_string apply after after_incl before before_incl indexes firstval first_value lastval last_value each_array each_arrayref pairwise natatime mesh zip uniq minmax', 'Hash::Util' => 'lock_keys unlock_keys lock_value unlock_value lock_hash unlock_hash hash_seed', 'Badger::Timestamp' => 'TS Timestamp Now', 'Badger::Logic' => 'LOGIC Logic', }; our $DELEGATES; # fill this from $HELPERS on demand our $RANDOM_NAME_LENGTH = 32; our $TEXT_WRAP_WIDTH = 78; __PACKAGE__->export_any(qw( UTILS blessed is_object numlike textlike params self_params plural odd_params xprintf dotid random_name camel_case CamelCase wrap permute_fragments )); __PACKAGE__->export_fail(\&_export_fail); # looks_like_number() is such a mouthful. I prefer numlike() to go with textlike() *numlike = \&Scalar::Util::looks_like_number; # it would be too confusing not to have this alias *CamelCase = \&camel_case; sub _export_fail { my ($class, $target, $symbol, $more_symbols) = @_; $DELEGATES ||= _expand_helpers($HELPERS); my $helper = $DELEGATES->{ $symbol } || return 0; require $helper->[FILE] unless $helper->[LOADED]; $class->export_symbol($target, $symbol, \&{ $helper->[CLASS].PKG.$symbol }); return 1; } sub _expand_helpers { # invert { x => 'a b c' } into { a => 'x', b => 'x', c => 'x' } my $helpers = shift; return { map { my $name = $_; # e.g. Scalar::Util my $file = module_file($name); # e.g. Scalar/Util.pm map { $_ => [$name, $file, 0] } # third item is loaded flag split(DELIMITER, $helpers->{ $name }) } keys %$helpers } } sub is_object($$) { blessed $_[1] && $_[1]->isa($_[0]); } sub textlike($) { ! ref $_[0] # check if $[0] is a non-reference || blessed $_[0] # or an object with an overloaded && overload::Method($_[0], '""'); # '""' stringification operator } sub params { # enable $DEBUG to track down calls to params() that pass an odd number # of arguments, typically when the rhs argument returns an empty list, # e.g. $obj->foo( x => this_returns_empty_list() ) my @args = @_; local $SIG{__WARN__} = sub { odd_params(@args); } if DEBUG; @_ && ref $_[0] eq HASH ? shift : { @_ }; } sub self_params { my @args = @_; local $SIG{__WARN__} = sub { odd_params(@args); } if DEBUG; (shift, @_ && ref $_[0] eq HASH ? shift : { @_ }); } sub odd_params { my $method = (caller(2))[3]; $WARN->( "$method() called with an odd number of arguments: ", join(', ', map { defined $_ ? $_ : '' } @_), "\n" ); my $i = 3; while (1) { my @info = caller($i); last unless @info; my ($pkg, $file, $line, $sub) = @info; $WARN->( sprintf( "%4s: Called from %s in %s at line %s\n", '#' . ($i++ - 2), $sub, $file, $line ) ); } } sub plural { my $name = shift; if ($name =~ /(ss|sh|ch|x)$/) { $name .= 'es'; } elsif ($name =~ s/([^aeiou])y$//) { $name .= $1.'ies'; } elsif ($name =~ /([^s\d\W])$/) { $name .= 's'; } return $name; } sub module_file { my $file = shift; $file =~ s[::][/]g; $file .= '.pm'; } sub xprintf { my $format = shift; my @args = @_; $format =~ s{ < (\d+) (?: :( [#\-\+ ]? [\w\.]+ ) )? (?: \| (.*?) )? > } { defined $3 ? _xprintf_ifdef(\@args, $1, $2, $3) : '%' . $1 . '$' . ($2 || 's') }egx; sprintf($format, @_); } sub _xprintf_ifdef { my ($args, $n, $format, $text) = @_; if (defined $args->[$n-1]) { $format = 's' unless defined $format; $format = '%' . $n . '$' . $format; $text =~ s/\?/$format/g; return $text; } else { return ''; } } sub dotid { my $text = shift; # munge $text to canonical lower case and dotted form $text =~ s/\W+/./g; # e.g. Foo::Bar ==> Foo.Bar return lc $text; # e.g. Foo.Bar ==> foo.bar } sub camel_case { join( BLANK, map { map { ucfirst $_ } split '_' } @_ ); } sub random_name { my $length = shift || $RANDOM_NAME_LENGTH; my $name = ''; require Digest::MD5; while (length $name < $length) { $name .= Digest::MD5::md5_hex( time(), rand(), $$, { }, @_ ); } return substr($name, 0, $length); } sub alternates { my $text = shift; return [ $text =~ /\|/ ? split(qr<\|>, $text, -1) # alternates: (foo|bar) as ['foo', 'bar'] : ('', $text) # optional (foo) as (|foo) as ['', 'foo'] ]; } sub wrap { my $text = shift; my $width = shift || $TEXT_WRAP_WIDTH; my $indent = shift || 0; my @words = split(/\s+/, $text); my (@lines, @line, $length); my $total = 0; while (@words) { $length = length $words[0] || (shift(@words), next); if ($total + $length > 74 || $words[0] eq '\n') { shift @words if $words[0] eq '\n'; push(@lines, join(" ", @line)); @line = (); $total = 0; } else { $total += $length + 1; # account for spaces joining words push(@line, shift @words); } } push(@lines, join(" ", @line)) if @line; return join( "\n" . (' ' x $indent), @lines ); } sub permute_fragments { my $input = shift; my (@frags, @outputs); # Lookup all the (a) optional fragments and (a|b|c) alternate fragments # replace them with %s. This gives us an sprintf format that we can later # user to re-fill the fragment slots. Meanwhile create a list of @frags # with each item corresponding to a (...) fragment which is represented # by a list reference containing the alternates. e.g. the input # string 'Fo(o|p) Ba(r|z)' generates @frags as ( ['o','p'], ['r','z'] ), # leaving $input set to 'Fo%s Ba%s'. We treat (foo) as sugar for (|foo), # so that 'Template(X)' is permuted as ('Template', 'TemplateX'), for # example. $input =~ s/ \( ( .*? ) \) / push(@frags, alternates($1)); '%s'; /gex; # If any of the fragments have multiple values then $format will still contain # one or more '%s' tokens and @frags will have the same number of list refs # in it, one for each fragment. To iterate across all permutations of the # fragment values, we calculate the product P of the sizes of all the lists in # @frags and loop from 0 to P-1. Then we use a div and a mod to get the right # value for each fragment, for each iteration. We divide $n by the product of # all fragment lists to the right of the current fragment and mod it by the size # of the current fragment list. It's effectively counting with a different base # for each column. e.g. consider 3 fragments with 7, 3, and 5 values respectively # [7] [3] [5] P = 7 * 3 * 5 = 105 # [n / 15 % 7] [n / 5 % 3] [n % 5] for 0 < n < P if (@frags) { my $product = 1; $product *= @$_ for @frags; for (my $n = 0; $n < $product; $n++) { my $divisor = 1; my @args = reverse map { my $item = $_->[ $n / $divisor % @$_ ]; $divisor *= @$_; $item; } reverse @frags; # working backwards from right to left push(@outputs, sprintf($input, @args)); } } else { push(@outputs, $input); } return wantarray ? @outputs : \@outputs; } sub _debug { print STDERR @_; } 1; __END__ =head1 NAME Badger::Utils - various utility functions =head1 SYNOPSIS use Badger::Utils 'blessed params'; sub example { my $self = shift; my $params = params(@_); if (blessed $self) { print "self is blessed\n"; } } =head1 DESCRIPTION This module implements a number of utility functions. It also provides access to all of the utility functions in L, L, L, L and L as a convenience. use Badger::Utils 'blessed reftype first max any all lock_hash md5_hex'; The single line of code shown here will import C and C from L, C and C from L, C and C from L, C from L, and C from L. These modules are loaded on demand so there's no overhead incurred if you don't use them (other than a lookup table so we know where to find them). =head1 EXPORTABLE FUNCTIONS C can automatically load and export functions defined in the L, L, L, L and L Perl modules. It also does the same for functions and constants defined in the Badger modules L (L, L and L) and L (L and L). For example: use Badger::Utils 'Now'; print Now->year; # prints the current year The following exportable functions are also defined in C =head2 UTILS Exports a C constant which contains the name of the C class. =head2 is_object($class,$object) Returns true if the C<$object> is a blessed reference which isa C<$class>. use Badger::Filesystem 'FS'; use Badger::Utils 'is_object'; if (is_object( FS => $object )) { # FS == Badger::Filesystem print $object, ' isa ', FS, "\n"; } =head2 textlike($item) Returns true if C<$item> is a non-reference scalar or an object that has an overloaded stringification operator. use Badger::Filesystem 'File'; use Badger::Utils 'textlike'; # Badger::Filesystem::File objects have overloaded string operator my $file = File('example.txt'); print $file; # example.txt print textlike $file ? 'ok' : 'not ok'; # ok =head2 numlike($item) This is an alias to the C function defined in L. =head2 params(@args) Method to coerce a list of named parameters to a hash array reference. If the first argument is a reference to a hash array then it is returned. Otherwise the arguments are folded into a hash reference. use Badger::Utils 'params'; params({ a => 10 }); # { a => 10 } params( a => 10 ); # { a => 10 } Pro Tip: If you're getting warnings about an "Odd number of elements in anonymous hash" then try enabling debugging in C. To do this, add the following to the start of your program before you've loaded C: use Badger::Debug modules => 'Badger::Utils' When debugging is enabled in C you'll get a full stack backtrace showing you where the subroutine was called from. e.g. Badger::Utils::self_params() called with an odd number of arguments: #1: Called from Foo::bar in /path/to/Foo/Bar.pm at line 210 #2: Called from Wam::bam in /path/to/Wam/Bam.pm at line 420 #3: Called from main in /path/to/your/script.pl at line 217 =head2 self_params(@args) Similar to L but also expects a C<$self> reference at the start of the argument list. use Badger::Utils 'self_params'; sub example { my ($self, $params) = self_params(@_); # do something... } If you enable debugging in C then you'll get a stack backtrace in the event of an odd number of parameters being passed to this function. See L for further details. =head2 odd_params(@_) This is an internal function used by L and L to report any attempt to pass an odd number of arguments to either of them. It can be enabled by setting C<$Badger::Utils::DEBUG> to a true value. use Badger::Utils 'params'; $Badger::Utils::DEBUG = 1; my $hash = params( foo => 10, 20 ); # oops! The above code will raise a warning showing the arguments passed and a stack backtrace, allowing you to easily track down and fix the offending code. Apart from obvious typos like the above, this is most likely to happen if you call a function or methods that returns an empty list. e.g. params( foo => 10, bar => get_the_bar_value(), ); If C returns an empty list then you'll end up with an odd number of elements being passed to C. You can correct this by providing C as an alternative value. e.g. params( foo => 10, bar => get_the_bar_value() || undef, ); =head2 plural($noun) The function makes a very naive attempt at pluralising the singular noun word passed as an argument. If the C<$noun> word ends in C, C, C or C then C will be added to the end of it. print plural('class'); # classes print plural('hash'); # hashes print plural('patch'); # patches print plural('box'); # boxes If it ends in C then it will be replaced with C. print plural('party'); # parties In all other cases, C will be added to the end of the word. print plural('device'); # devices It will fail miserably on many common words. print plural('woman'); # womans FAIL! print plural('child'); # childs FAIL! print plural('foot'); # foots FAIL! This function should I be used in cases where the singular noun is known in advance and has a regular form that can be pluralised correctly by the algorithm described above. For example, the L module allows you to specify C<$ITEM> and C<$ITEMS> package variable to provide the singular and plural names of the items that the factory manages. our $ITEM = 'person'; our $ITEMS = 'people'; If the singular noun is sufficiently regular then the C<$ITEMS> can be omitted and the C function will be used. our $ITEM = 'codec'; # $ITEMS defaults to 'codecs' In this case we know that C will pluralise correctly to C and can safely leave C<$ITEMS> undefined. For more robust pluralisation of English words, you should use the L module by Damian Conway. For further information on the difficulties of correctly pluralising English, and details of the implementation of L, see Damian's paper "An Algorithmic Approach to English Pluralization" at L =head2 module_file($name) Returns the module name passed as an argument as a relative filesystem path suitable for feeding into C print module_file('My::Module'); # My/Module.pm =head2 camel_case($string) / CamelCase($string) Converts a lower case string where words are separated by underscores (e.g. C) into CamelCase where each word is capitalised and words are joined together (e.g. C). According to Perl convention (and personal preference), we use the lower case form wherever possible. However, Perl's convention also dictates that module names should be in CamelCase. This function performs that conversion. =head2 wrap($text, $width, $indent) Simple subroutine to wrap C<$text> to a fixed C<$width>, applying an optional indent of C<$indent> spaces. It uses a trivial algorithm which splits the text into words, then rejoins them as lines. It has an additional hack to recognise the literal sequence '\n' as a magical word indicating a forced newline break. It must be specified as a separate whitespace delimited word. print wrap('Foo \n Bar'); If anyone knows how to make L handle this, or knows of a better solution then please let me know. =head2 dotid($text) The function returns a lower case representation of the text passed as an argument with all non-word character sequences replaced with dots. print dotid('Foo::Bar'); # foo.bar =head2 xprintf($format,@args) A wrapper around C which provides some syntactic sugar for embedding positional parameters. xprintf('The <2> sat on the <1>', 'mat', 'cat'); xprintf('The <1> costs <2:%.2f>', 'widget', 11.99); =head2 random_name($length,@data) Generates a random name of maximum length C<$length> using any additional seeding data passed as C<@args>. If C<$length> is undefined then the default value in C<$RANDOM_NAME_LENGTH> (32) is used. my $name = random_name(); my $name = random_name(64); =head2 permute_fragments($text) This function permutes any optional or alternate fragments embedded in parentheses. For example, C is permuted as (C, C) and C is permuted as (C, C). permute('Badger(X)'); # Badger, BadgerX permute('Badger(X|Y)'); # BadgerX, BadgerY Multiple fragments may be embedded. They are expanded in order from left to right, with the rightmost fragments changing most often. permute('A(1|2):B(3|4)') # A1:B3, A1:B4, A2:B3, A2:B4 =head2 alternates($text) This function is used internally by the L function. It returns a reference to a list containing the alternates split from C<$text>. alternates('foo|bar'); # returns ['foo','bar'] alternates('foo'); # returns ['','bar'] If the C<$text> doesn't contain the C<|> character then it is assumed to be an optional item. A list reference is returned containing the empty string as the first element and the original C<$text> string as the second. =head1 AUTHOR Andy Wardley L =head1 COPYRIGHT Copyright (C) 1996-2009 Andy Wardley. All Rights Reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut # Local Variables: # mode: perl # perl-indent-level: 4 # indent-tabs-mode: nil # End: # # vim: expandtab shiftwidth=4: