package Tie::FileLRUCache; use strict; use Class::ParmList qw (simple_parms parse_parms); use Digest::SHA1 qw(sha1_hex); use Fcntl qw (:flock); use File::Spec; use Storable qw (nstore nfreeze retrieve); use Symbol qw (gensym); use vars qw ($VERSION); BEGIN { $VERSION = "1.05"; } ########################################################################### =head1 NAME Tie::FileLRUCache - A lightweight but robust filesystem based persistent LRU cache =head1 CHANGES 1.05 2005.09.14 - Changes to pod tests to make them more CPANTS friendly. No functional changes. 1.04 2005.09.13 - Removed use of 'warnings' to fix compatibility with Perl 5.005. Fixed minor typographical errors in documentation. 1.03 2005.09.10 - Changed build test to handle difference in treatment of hashes in scalar contect between 5.6.x and 5.8.x versions of Perl that caused a test failure under Perl 5.6.x. 1.02 2005.09.08 - Added build tests. Major code cleanup. Improved platform portability. Added and documented 'cache_dir', 'keep_last' and 'number_of_entries' methods. Added Module::Build support. 1.01 1999.12.09 - Added 'detainting' to cache management code. =head1 SYNOPSIS =head2 OBJECT INTERFACE use Tie::FileLRUCache; my $cache = Tie::FileLRUCache->new({ -cache_dir => $directory, -keep_last => 100 }); # Inserting value into LRU cache using '-key' $cache->update({ -key => $key, -value => $value }); # Inserting value into LRU cache using '-cache_key' my $cache_key = $cache->make_cache_key({ -key => $key }); $cache->update({ -cache_key => $cache_key, -value => $value }); # Checking LRU cache my ($in_cache,$value) = $cache->check({ -key => $key }); if ($in_cache) { return $value; } # Not in cache - do something else # Checking LRU cache with speed up hack for objects, hashes, arrays etc used as keys my $cache_key = $cache->make_cache_key({ -key => $something }); my ($in_cache,$value) = $cache->check({ -cache_key => $cache_key }); if ($in_cache) { return $value; } # Not in cache - do something else # Deleting a key and its value from the cache $cache->delete({ -key => $key }); # Clearing LRU cache $cache->clear; =head2 TIED INTERFACE use Tie::FileLRUCache; [$X =] tie %hash, 'Tie::FileLRUCache', $cache_dir, $keep_last_n; # Adding a key/value to the cache $hash{$key} = $value; # Checking the cache if (not exists $hash{$key}) {; # No match . . . } else { my $value = $hash{$key}; . . . } # Removing a value from the cache; delete $hash{$key}; # Clearing the cache %hash = (); Note: Iteration over the cache (each, keys, values) is _NOT_ supported. =cut =head1 DESCRIPTION Provides a lightweight persistent filesystem based LRU cache. It uses the 'last accessed' timestamp generated by the file system to determine the 'oldest' cache entry and discards the oldest cache entries when needed to stay under the -keep_last limit. If you store thing very fast (such that many entries receive the same time stamp), it is essentially a coin toss which entry within a single timestamped second gets purged from the cache to make room for new ones. It is not designed to handle huge numbers of cached items. It is probably unwise to set the 'keep_last' higher than around 100. =cut =head1 OBJECT METHODS =cut ####################################################################### =over 4 =item new({[ -cache_dir => $cache_directory] [, -keep_last => $keep_last_n ] }); Creates and optionally initializes a Tie::FileLRUCache object: Example: my $cache = Tie::FileLRUCache->new({ -cache_dir => '/tmp/testing', -keep_last => 100, }); The default cache size is 100 entries unless specified. =back =cut sub new { my $proto = shift; my $package = __PACKAGE__; my $class = ref ($proto) || $proto || $package; my $self = bless {}, $class; my $parms = parse_parms({ -parms => \@_, -legal => [-cache_dir, -keep_last], -required => [], -defaults => { -keep_last => 100, -cache_dir => undef, }, }); if (not defined $parms) { my $error_message = Class::ParmList->error; require Carp; Carp::croak ($package . "::new() - Parameter error '$error_message'\n"); } # Save settings my ($cache_dir,$keep_last) = $parms->get(-cache_dir,-keep_last); $self->cache_dir($cache_dir); $self->keep_last($keep_last); $self; } ####################################################################### =over 4 =item check({ -key => $key }); Reads the cache for the key. Returns two values: $cache_hit (true if a hit was found, false if not) $value (the cached value, undef if no hit) Examples: my ($cache_hit,$value) = $cache->check({ -key => $key }); my ($cache_hit,$value) = $cache->check({ -cache_key => $cache_key }); The '-key' form is used when you just want to use a raw key. It can use blessed objects, hash refs, scalars, or array refs as keys. The more complex structures take a speed penalty for computing a canonical form. You can minimize this penalty by using the '-cache_key' form instead. The '-cache_key' form is used for performance reasons when using keys such as complex blessed objects or hashes as a key. The -cache_key is obtained with a call to 'make_cache_key'. It is legal to mix -cache_key and -key based calls - they are cross-compatible. =back =cut sub check { my $self = shift; my $package = __PACKAGE__; if (not wantarray) { require Carp; Carp::croak ($package . "::check() - Called in a scalar context\n"); } my $parms = parse_parms({ -parms => \@_, -legal => [-cache_key, -key], -required => [], -defaults => {}, }); if (not defined $parms) { my $error_message = Class::ParmList->error; require Carp; Carp::croak ($package . "::check() - $error_message\n"); } my ($key,$cache_key) = $parms->get(-key,-cache_key); if (not (defined ($key) or defined ($cache_key))) { require Carp; Carp::croak ($package . "::check() - Called without either a -key or -cache_key\n"); } my $cache_dir = $self->cache_dir; unless (defined $cache_dir) { require Carp; Carp::croak ($package . "::check - No cache directory set.\n"); } # Ok. Set our lock on the cache $self->_lock_cache; # Generate the cache_key (done by making a cannonical # network order Storable string out of the key) if we # don't already have it unless (defined $cache_key) { $cache_key = $self->make_cache_key({ -key => $key }); } # Generate a unique cache file name by taking a SHA1 hash of $cache_key my $cache_hash = lc(sha1_hex($cache_key)); $cache_hash =~ s/\s//gs; my ($untainted_cache_hash) = $cache_hash =~ m/^([a-f0-9]+)$/s; my $cache_file = File::Spec->catfile($cache_dir, "cl_$untainted_cache_hash"); # Check if there is a cache entry for this key unless (-e $cache_file) { $self->_unlock_cache; return (0,undef); } # Yes. Get it. And update the last modified and last accessed dates. my $entry; eval { $entry = retrieve($cache_file); my $now = time; utime ($now, $now, $cache_file); }; if ($@) { my $error = $@; $self->_unlock_cache; require Carp; Carp::croak($package . "::check - Error while retrieving cache entry file '$cache_file': $error\n"); } unless (defined $entry) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak($package . "::update - Failed to retrieve cache entry file '$cache_file': $error\n"); } # Release the lock. $self->_unlock_cache; my $cache_value = $entry->{'-value'}; # Give them their cupie doll return (1, $cache_value); } ####################################################################### =over 4 =item make_cache_key({ -key => $key }); Generates a cache key by canonicalizing a passed key as a network ordered canonical Storable string. Example: my $cache_key = $cache->make_cache_key({ -key => $key }); =back =cut sub make_cache_key { my $self = shift; my $package = __PACKAGE__; my $parms = parse_parms({ -parms => \@_, -legal => [], -required => ['-key'], -defaults => {}, }); unless (defined $parms) { my $error_message = Class::ParmList->error; require Carp; Carp::croak ($package . "::make_cache_key() - $error_message\n"); } my ($key) = $parms->get(-key); my $temp = $Storable::canonical; my $result = nfreeze(\$key); $Storable::canonical = $temp; if (not $result) { my $error = $!; require Carp; Carp::croak ($package . "::check() - Unable to serialize passed -key value: $error"); } return $result; } ####################################################################### =over 4 =item clear; Completely clears the cache of all cache entries. =back =cut sub clear { my $self = shift; my $package = __PACKAGE__; my $cache_dir = $self->cache_dir; unless (defined $cache_dir) { require Carp; Carp::croak ($package . "::clear - No cache directory set.\n"); } if ($cache_dir eq '') { require Carp; Carp::croak ($package . "::clear - Cannot use root directory as cache directory.\n"); } if ((-e $cache_dir) and (not -d _)) { require Carp; Carp::croak ($package . "::clear - '$cache_dir' already exists and is not a directory.\n"); } $self->_lock_cache; my $cache_dir_fh = gensym; if (not opendir ($cache_dir_fh, $cache_dir)) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak ($package . "::clear - Failed to open directory '$cache_dir' for reading: $error\n"); } my @raw_directory_list = readdir($cache_dir_fh); unless (closedir ($cache_dir_fh)) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak ($package . "::clear - Failed to close directory '$cache_dir': $error\n"); } # Untaint the filenames, convert them to absolute file paths and unlink them. my @raw_files_list = grep(/^(cacheline_[a-zA-Z0-9]{1,50}|cl_[a-zA-Z0-9]{1,50})$/s, @raw_directory_list); my @file_list = (); foreach my $item (@raw_files_list) { my ($filename) = $item =~ m/^(.*)$/s; my $file_path = File::Spec->catfile($cache_dir, $filename); unless (unlink $file_path) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak($package . "::clear - Failed to unlink file '$file_path': $error"); } } $self->_unlock_cache; return; } ####################################################################### =over 4 =item update({ [-key => $key,] [-cache_key => $cache_key, ], -value => $value [, -keep_last => $keep_last_n ] }); Updates the Least Recently Used (LRU) cache for the specified key with the passed value. '-keep_last' is optional after the first access to a dataset. It will use the I 'keep_last' used if not specified. It is legal to use ordinary scalars, hash references, or array references as keys as well as objects as -keys or -values. Basically, anything that Storable can reproducibly serialize can be used. Examples: $cache->update({ -key => $key, -value => $value }); $cache->update({ -key => $key, -value => $value, -keep_last => 100}); my $cache_key = $cache->make_cache_key({ -key => $key }); $cache->update({ -cache_key => $cache_key, -value => $value }); my $cache_key = $cache->make_cache_key({ -key => $key }); $cache->update({ -cache_key => $cache_key, -value => $value, -keep_last => 50 }); -cache_key is assumed to be a simple scalar value for use as a key. -key can be pretty much anything Storable can successfully and reproducibly serialize. One or the other I be passed. =back =cut sub update { my $self = shift; my $package = __PACKAGE__; my $parms = parse_parms({ -parms => \@_, -legal => ['-keep_last', '-key', '-cache_key'], -required => ['-value'], -defaults => {'-keep_last' => $self->keep_last}, }); unless (defined $parms) { my $error_message = Class::ParmList->error; require Carp; Carp::croak ($package . "::update() - $error_message\n"); } my ($key,$cache_key,$value,$keep_last) = $parms->get('-key', '-cache_key', '-value', '-keep_last'); unless (defined ($key) or defined ($cache_key)) { require Carp; Carp::croak ($package . "::update() - Called without either a -key or -cache_key. At least one of them must be passed.\n"); } my ($cache_dir) = $self->cache_dir; unless (defined $cache_dir) { require Carp; Carp::croak ($package . "::update - No cache directory set.\n"); } # Generate the cache_key (done by making a cannonical # network order Storable string out of the key) if we # don't already have one. unless (defined $cache_key) { $cache_key = $self->make_cache_key({ -key => $key }); } # Generate a unique cache file # name by taking a SHA1 hash of # $cache_key and dumping it as hex my $cache_hash = lc(sha1_hex($cache_key)); $cache_hash =~ s/\s//gs; my ($untainted_cache_hash) = $cache_hash =~ m/^([a-f0-9]+)$/s; my $cache_file = File::Spec->catfile($cache_dir, "cl_$untainted_cache_hash"); # Serialize the $value for storage my $entry = { -value => $value }; # Set our lock on the cache directory $self->_lock_cache; ########## # Store the cache entry. my $result; eval { $result = nstore($entry,$cache_file); }; if ($@) { my $error = $@; $self->_unlock_cache; require Carp; Carp::croak($package . "::update - Error while saving cache entry file '$cache_file': $error"); } unless (defined $result) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak($package . "::update - Error while saving cache entry file '$cache_file': $error\n"); } ######################################## # Check if we need to purge old entries my $cache_dir_fh = gensym; unless (opendir ($cache_dir_fh, $cache_dir)) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak ($package . "::update - Failed to open directory '$cache_dir' for reading: $error\n"); } my @raw_file_list = grep(/^(cacheline_[a-fA-F0-9]{1,50}|cl_[a-fA-F0-9]{1,50})$/s,readdir($cache_dir_fh)); unless (closedir ($cache_dir_fh)) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak ($package . "::update - Failed to close directory '$cache_dir': $error\n"); } # Untainting the filenames and converting them to absolute file paths. my @file_list = (); foreach my $item (@raw_file_list) { my ($filename) = $item =~ m/^(.*)$/s; my $file_path = File::Spec->catfile($cache_dir, $filename); push (@file_list,$file_path); } my $n_files = $#file_list + 1; # No problems. All done. if ($n_files <= $keep_last) { $self->_unlock_cache; return; } # Too many entries. Delete the excess entries (usually only one) my %file_last_access = (); foreach my $file (@file_list) { my $last_accessed = (stat($file))[9]; $file_last_access{$file} = $last_accessed; } my @sorted_file_list = sort { $file_last_access{$b} <=> $file_last_access{$a} } @file_list; while (($n_files > $keep_last) and ($n_files > 0)) { $n_files--; my $pruned_file = $sorted_file_list[$n_files]; unless (unlink $pruned_file) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak($package . "::update - Failed to unlink file '$pruned_file': $error"); } } # Release our lock and return $self->_unlock_cache; return; } ####################################################################### =over 4 =item delete({ -key => $key }); Forces the deletion of a specific key from the cache. Example: $cache->delete({ -key => $key }); =back =cut sub delete { my $self = shift; my $package = __PACKAGE__; my $parms = parse_parms({ -parms => \@_, -legal => [-key, -cache_key], -required => [], -defaults => {}, }); if (not defined $parms) { my $error_message = Class::ParmList->error; require Carp; Carp::croak ($package . "::delete() - $error_message\n"); } my ($key,$cache_key) = $parms->get(-key, -cache_key); if (not (defined ($key) or defined ($cache_key))) { require Carp; Carp::croak ($package . "::delete() - Called without either a -key or -cache_key\n"); } my $cache_dir = $self->cache_dir; unless (defined $cache_dir) { require Carp; Carp::croak ($package . "::delete - No cache directory set.\n"); } if ($cache_dir eq '') { require Carp; Carp::croak ($package . "::delete - Cannot use root directory as cache directory.\n"); } # Generate the cache_key (done by making a cannonical # network order Storable string out of the key) if we # don't already have it if (not defined $cache_key) { $cache_key = $self->make_cache_key({ -key => $key }); } # Generate a unique cache file # name by taking a SHA1 hash of # $cache_key my $cache_hash = lc(sha1_hex($cache_key)); $cache_hash =~ s/\s//gs; my ($untainted_cache_hash) = $cache_hash =~ m/^([a-f0-9]+)$/s; my $cache_file = File::Spec->catfile($cache_dir, "cl_$untainted_cache_hash"); # Ok. Set our lock on the cache directory $self->_lock_cache; # If it is in the cache, remove it if ((-e $cache_file) and (not unlink $cache_file)) { my $error = $!; $self->_unlock_cache; require Carp; Carp::croak($package . "::delete - Failed to unlink file '$cache_file': $error"); } # Release our lock and return $self->_unlock_cache; } ####################################################################### =over 4 =item cache_dir([$cache_directory_path]); Get/Set accessor for the cache directory path. Ex. my $cache_directory = $cache->cache_dir; $cache->cache_dir($cache_directory); =back =cut sub cache_dir { return shift->_property('cache_dir', @_); } ####################################################################### =over 4 =item keep_last([$keep_last_n]); Get/Set accessor for the keep last N setting. Ex. my $n_last = $cache->keep_last; $cache->keep_last(20); =back =cut sub keep_last { return shift->_property('keep_last', @_); } ####################################################################### =over 4 =item number_of_entries; Returns the current number of entries in the cache. =back =cut sub number_of_entries { my $self = shift; my $package = __PACKAGE__; my $cache_dir_fh = gensym; my $cache_dir = $self->cache_dir; unless (defined $cache_dir) { require Carp; Carp::croak ($package . "::delete - No cache directory set.\n"); } if ($cache_dir eq '') { require Carp; Carp::croak ($package . "::delete - Cannot use root directory as cache directory.\n"); } unless (opendir ($cache_dir_fh, $cache_dir)) { my $error = $!; require Carp; Carp::croak ($package . "::update - Failed to open directory '$cache_dir' for reading: $error\n"); } my @raw_file_list = grep(/^(cacheline_[a-fA-F0-9]{1,50}|cl_[a-fA-F0-9]{1,50})$/s,readdir($cache_dir_fh)); unless (closedir ($cache_dir_fh)) { my $error = $!; require Carp; Carp::croak ($package . "::update - Failed to close directory '$cache_dir': $error\n"); } my $n_entries = $#raw_file_list + 1; return $n_entries; } ####################################################################### # # # PRIVATE METHODS # # # # Internals. Documented for maintainance reasons only. # # Do not use these methods from outside this module. # # # ####################################################################### ####################################################################### # _cache_lock_fh([$fh]); # # Get/Set accessor used to store a reference to the filehandle # used for locking. sub _cache_lock_fh { return shift->_property('_cache_lock_fh', @_); } ####################################################################### # _lock_cache; # # Obtains a lock on the 'cache.lock' file for this LRU cache. # # Example: # $self->_lock_cache; # # This will create the 'cache.lock' file if it does not already exist, # creating any intermediate directories as needed. # # It also writes the current PID to the lock file. sub _lock_cache { my $self = shift; my $package = __PACKAGE__; my $cache_dir = $self->cache_dir; if (not defined $cache_dir) { require Carp; Carp::croak ($package . "::_lock_cache - No cache directory set.\n"); } if ($cache_dir eq '') { require Carp; Carp::croak ($package . "::_lock_cache - Cannot use root directory as cache directory.\n"); } if ((-e $cache_dir) and (not -d _)) { require Carp; Carp::croak ($package . "::_lock_cache - '$cache_dir' already exists and is not a directory.\n"); } if (not -e $cache_dir) { eval { require File::Path; File::Path::mkpath ($cache_dir); }; if ($@) { my $error = $@; require Carp; Carp::croak ($package . "::_lock_cache - unable to create directory '$cache_dir': $error"); } } if (not ((-e $cache_dir) and (-d _))) { require Carp; Carp::croak ($package . "::_lock_cache - Unable to create directory '$cache_dir'\n"); } my $document_name = File::Spec->catfile($cache_dir,'.cache.lock'); my $cache_lock_fh = gensym; unless (open ($cache_lock_fh,">>$document_name")) { my $error = $!; require Carp; Carp::croak ($package . "::_lock_cache - Unable to open '$document_name': $error\n"); } my $lock_timeout = 100; while (not flock($cache_lock_fh, LOCK_EX()|LOCK_NB())) { $lock_timeout--; select (undef,undef,undef,0.1); if ($lock_timeout == 0) { my $error = $!; require Carp; Carp::croak ($package . "::_lock_cache - Unable to get an exclusive lock on '$document_name': $error\n"); } } my $fh = select ($cache_lock_fh); $|++; select ($fh); unless (truncate ($cache_lock_fh, 0)) { my $error = $!; require Carp; Carp::croak ($package . "::_lock_cache - Unable to truncate '$document_name': $error\n"); } print $cache_lock_fh "$$\n"; $self->_cache_lock_fh($cache_lock_fh); return; } ####################################################################### # _unlock_cache; # # Release a lock on the 'cache.lock' file for this LRU cache. # # Example: # $self->_unlock_cache; sub _unlock_cache { my $self = shift; my $package = __PACKAGE__; my $cache_lock_fh = $self->_cache_lock_fh; unless (truncate ($cache_lock_fh,0)) { my $error = $!; require Carp; Carp::croak ($package . "::_lock_cache - Unable to truncate cache.lock file: $error\n"); } unless (close ($cache_lock_fh)) { my $error = $!; require Carp; Carp::croak ($package . "::_unlock_cache - Error while closing cache.lock file: $error\n"); } return; } #################################################################### # _property('property_name' => $property_value) # # get/set base accessor for property values sub _property { my $self = shift; my $property = shift; my $package = __PACKAGE__; if (0 == @_) { my $output = $self->{$package}->{$property}; return $output; } elsif (1 == @_) { my $input = shift; $self->{$package}->{$property} = $input; return; } else { require Carp; Carp::croak("Bad calling parameters to ${package}::${property}()\n"); } } #################################################################### sub TIEHASH { my $proto = shift; my $package = __PACKAGE__; my $class = ref ($proto) || $proto || $package; my $self = bless {}, $class; my ($cache_dir,$keep_last) = @_; $keep_last = 100 unless (defined $keep_last); unless (defined ($cache_dir) and ($cache_dir ne '')) { require Carp; Carp::croak($package . ": Missing required parameter (cache_dir)\n"); } $self->cache_dir($cache_dir); $self->keep_last($keep_last); return $self; } ####################################################################### sub STORE { my $self = shift; my ($key,$value) = @_; if (ref(\$key) eq 'SCALAR') { $self->update({ -cache_key => $key, -value => $value }); } else { $self->update({ -key => $key, -value => $value }); } } ####################################################################### sub FETCH { my $self = shift; my ($key) = @_; if (ref(\$key) eq 'SCALAR') { my ($cache_hit, $value) = $self->check({ -cache_key => $key }); return $value; } else { my ($cache_hit,$value) = $self->check({ -key => $key }); return $value; } } ####################################################################### sub DELETE { my $self = shift; my ($key) = @_; if (ref(\$key) eq 'SCALAR') { $self->delete({ -cache_key => $key }); } else { $self->delete({ -key => $key }); } } ####################################################################### sub CLEAR { my $self = shift; $self->clear; } ####################################################################### sub EXISTS { my $self = shift; my ($key) = @_; if (ref(\$key) eq 'SCALAR') { my ($cache_hit,$value) = $self->check({ -cache_key => $key }); return $cache_hit; } else { my ($cache_hit,$value) = $self->check({ -key => $key }); return $cache_hit; } } ####################################################################### # # Iteration over the cache is not supported # sub FIRSTKEY { undef; } ####################################################################### # # Iteration over the cache is not supported # sub NEXTKEY { undef; } ####################################################################### # # We return the number of cache entries in a scalar context # sub SCALAR { my $self = shift; return $self->number_of_entries; } ####################################################################### ####################################################################### =head1 COPYRIGHT Copyright 1999, Benjamin Franz () and FreeRun Technologies, Inc. (). All Rights Reserved. =head1 VERSION 1.05 released 2005.09.14 =head1 LICENSE This software may be copied or redistributed under the same terms as Perl itelf. This means that you can, at your option, redistribute it and/or modify it under either the terms the GNU Public License (GPL) version 1 or later, or under the Perl Artistic License. See http://dev.perl.org/licenses/ =head1 DISCLAIMER THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Use of this software in any way or in any form, source or binary, is not allowed in any country which prohibits disclaimers of any implied warranties of merchantability or fitness for a particular purpose or any disclaimers of a similar nature. IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION (INCLUDING, BUT NOT LIMITED TO, LOST PROFITS) EVEN IF I HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE =head1 AUTHOR Benjamin Franz =head1 TODO Nothing. =cut 1;