package BerkeleyDB::Lite ; # Copyright (c) 1997-2001 Jim Schueler. All rights reserved. # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # The documentation for this module is at the bottom of this file, # after the line __END__. use BerkeleyDB ; use Storable qw( nfreeze thaw ) ; use Carp ; use 5.006; use strict; use warnings; require Exporter; our $VERSION = '1.1'; ## See Changes file our @defaults = ( rootdir => "/usr/local/apache/cgi-bin/db" ) ; our ( %env, %dbreg ) ; our @ISA = qw( Exporter ); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use BerkeleyDB::Lite::Hash ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw() ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( duplicatekeys incrementkeys uniquepairs uniquekeys ); ## Problems have occured during global destruction if Env objects are ## closed before database objects. %dbreg closes all open databases ## first. END { closeall() ; } ## Run closeall if forking child processes. All references must be destroyed ## first. sub closeall { map { eval { $_->db_close } } values %dbreg ; %env = () ; } sub new { my $invocator = shift ; my $class = ref $invocator || $invocator ; my $self = $class->scalars( @_ ) ; my $ref = tied %$self ; return undef unless $ref ; $ref->filter_store_value ( sub { $_ = nfreeze( ref $_? $_: \$_ ) ; } ) ; $ref->filter_fetch_value ( sub { $_ = thaw $_ ; } ) ; return bless $self, $class ; } sub uniquekeys { return 1 ; } sub duplicatekeys { return property => DB_DUP ; } sub uniquepairs { return property => DB_DUP | DB_DUPSORT ; } sub incrementkeys { return compare => sub { ( $_[0] || 0 ) <=> ( $_[1] || 0 ) } ; } sub envsetup { my %config = ( @defaults, @_ ) ; croak "'home' not defined" unless $config{home} ; croak "'filename' not defined" unless $config{filename} ; ### Environment ### my %args_env = () ; $args_env{'-Cachesize'} = $config{cachesize} if exists $config{cachesize} ; if ( exists $config{server} ) { $args_env{'-Server'} = $config{server} ; $args_env{'-Home'} = $config{home} ; } else { $args_env{'-Home'} = "$config{rootdir}/$config{home}" ; } my $flags = DB_CREATE | DB_INIT_MPOOL | DB_INIT_CDB ; $flags ||= DB_RECOVER if $config{recover} ; $env{ $config{home} } ||= new BerkeleyDB::Env %args_env, -Flags => $flags, or warn "$!" ; ### Database ### my %args_db = () ; $args_db{filename} = "$config{rootdir}/$config{home}/$config{filename}" unless $config{server} ; $args_db{'-Env'} = $env{ $config{home} } ; $args_db{'-Filename'} = $config{filename} ; $args_db{'-Property'} = $config{property} if exists $config{property} ; $args_db{'-Compare'} = $config{compare} if exists $config{compare} ; return %args_db ; } ## intended for duplicatekeys sub recordset { my $ref = shift ; my $self = tied %$ref ; my $key = shift ; my $value = "" ; my @values = () ; # database locked my $cursor = $self->db_cursor ; if ( $cursor->c_get( $key, $value, DB_SET ) ) { $cursor = undef ; return @values ; } push @values, $value ; while ( ! $cursor->c_get( $key, $value, DB_NEXT_DUP ) ) { push @values, $value ; } $cursor = undef ; return @values ; } ## experimental to improve durability sub sync { my $ref = shift ; my $self = tied %$ref ; $self->db_sync ; } sub syncall { map { $_->db_sync } values %dbreg ; } ## intended for duplicatekeys sub delete { my $ref = shift ; my $self = tied %$ref ; my $key = shift ; my $value = shift ; my $orig = $value ; my $cursor = $self->db_cursor( DB_WRITECURSOR ) ; my $status = $cursor->c_get( $key, $value, DB_GET_BOTH ) ; ## Warning: Ensure consistency between numbers with strings. ## See Storable documentation. $cursor->c_del unless $status ; $cursor = undef ; return $status ; } sub DESTROY { my $ref = shift ; my $self = tied %$ref ; return unless $self ; delete $dbreg{ $self->[0] } ; ## Sleepycat suggests that as an RPC client, db_close is a noop. ## In that case, db_sync ought to be called instead. ## How to check whether db's env uses RPC? eval { $self->db_sync } ; eval { $self->db_close } ; } package BerkeleyDB::Lite::Hash ; use BerkeleyDB ; use Carp ; our @ISA = qw( BerkeleyDB::Lite ) ; sub scalars { my $invocator = shift ; my $class = ref $invocator || $invocator ; my( $self, %self ) ; my %env = BerkeleyDB::Lite::envsetup( @_ ) ; my %alt = @_ ; my $filename = $env{filename} ; delete $env{filename} ; ### Table ### $self = tie %self, $alt{subclass} || 'BerkeleyDB::Hash', %env, -Flags => DB_CREATE, or warn "($$) $filename: $!" if $env{'-Env'} ; ## Failsafe- open R/O without locking if ( $filename && ! $self ) { delete $env{ '-Filename' } ; delete $env{ '-Env' } ; $self = tie %self, 'BerkeleyDB::Hash', -Filename => $filename, -Flags => DB_RDONLY, %env, or warn "$filename: $! (readonly)" ; } $dbreg{ $self->[0] } = $self if ref $self ; return bless \%self, $class ; } package BerkeleyDB::Lite::Btree ; use BerkeleyDB ; use Carp ; our @ISA = qw( BerkeleyDB::Lite ) ; sub lexical { my $invocator = shift ; my $class = ref $invocator || $invocator ; return BerkeleyDB::Lite::Btree::Lexical->new( @_ ) ; } sub scalars { my $invocator = shift ; my $class = ref $invocator || $invocator ; my( $self, %self ) ; my %env = BerkeleyDB::Lite::envsetup( @_ ) ; my %alt = @_ ; my $filename = $env{filename} ; delete $env{filename} ; ### Table ### $self = tie %self, $alt{subclass} || 'BerkeleyDB::Btree', %env, -Flags => DB_CREATE, or warn "$filename $!" if $env{'-Env'} ; ## Failsafe- open R/O without locking if ( $filename && ! $self ) { delete $env{ '-Filename' } ; delete $env{ '-Env' } ; $self = tie %self, 'BerkeleyDB::Btree', -Filename => $filename, -Flags => DB_RDONLY, %env, or warn "$filename $!" ; } $dbreg{ $self->[0] } = $self if ref $self ; return bless \%self, $class ; } sub dosearch { my $ref = shift ; my $self = tied %$ref ; my $partkey = shift ; my $isunique = shift ; my %unique = () ; my @keys = () ; my @values = () ; my @each = () ; return [] unless $partkey ; my $length = length $partkey ; # database locked my $cursor = $self->db_cursor ; my $value = 0 ; my $key = $partkey ; my $status = $cursor->c_get( $key, $value, DB_SET_RANGE ) ; while ( $key ) { last if $status || substr( $key, 0, $length ) ne $partkey ; if ( $isunique ) { $unique{ $key }++ ; } else { push @each, [ $key, $value ] ; } $status = $cursor->c_get( $key, $value, DB_NEXT ) ; } $cursor = undef ; @each = map { [ $_, $unique{$_} ] } keys %unique if $isunique ; return \@each ; } sub matchingkeys { return map { $_->[0] } @{ dosearch( @_ ) } ; } sub matchingvalues { return map { $_->[1] } @{ dosearch( @_ ) } ; } sub searchset { return map { @$_ } @{ dosearch( @_ ) } ; } ## intended for Btree's with incremented keys sub nextrecord { my $ref = shift ; my $self = tied %$ref ; my $key = 0 ; my $value = 0 ; my $cursor = $self->db_cursor() ; $cursor->c_get( $key, $value, DB_LAST ) ; $ref->{ $key +1 } = {} ; $cursor = undef ; return $key +1 ; } package BerkeleyDB::Lite::Btree::Lexical ; our @ISA = qw( BerkeleyDB::Lite::Btree ) ; sub scalars { my $invocator = shift ; my $class = ref $invocator || $invocator ; my $self = BerkeleyDB::Lite::Btree->scalars( @_ ) ; my $ref = tied %$self ; return undef unless $ref ; $ref->filter_store_key ( sub { $_ = sprintf "%010d", $_ ; } ) ; $ref->filter_fetch_key ( sub { $_ = sprintf "%d", $_ ; } ) ; return bless $self, $class ; } 1; __END__ # Below is stub documentation for your module. You better edit it! =head1 NAME BerkeleyDB::Lite - Simplified Interface to BerkeleyDB =head1 SYNOPSIS use BerkeleyDB::Lite; =head2 ## Example 1 ## Create a Hashed database my $db = new BerkeleyDB::Lite::Hash home => 'zoo', filename => 'residents' ; $db->{Samson} = new Primate ; $db->{Cornelius} = new Primate ; $db->{Kaa} = new Reptile ; =head2 ## Example 2 ## Create a Btree database allowing duplicates and scalar values my $types = scalars BerkeleyDB::Lite::Btree home => 'zoo', filename => 'types', &duplicatekeys ; $types->{primate} = 'Samson' ; $types->{primate} = 'Cornelius' ; $types->{reptile} = 'Kaa' ; printf "%s\n", join ' ', $types->recordset{primate} ; ## prints: Samson Cornelius $types->delete( primate => 'Samson' ) ; printf "%s\n", join ' ', $types->recordset{primate} ; ## prints: Cornelius =head2 ## Example 3 ## Create a database of visitors ## Use a table with arbitrary keys ## Track visitors by date/timestamp $tickets = new BerkeleyDB::Lite::Btree home => 'zoo', filename => 'tickets', &incrementkeys ; ## Lexical Alternative # $tickets = lexical BerkeleyDB::Lite::Btree # home => 'zoo', # filename => 'tickets' ; $bytime = scalars BerkeleyDB::Lite::Btree home => 'zoo', filename => 'ticketsbytime', &duplicatekeys ; ## Process a new visitor in real time sub newvisitor { my $serial = $tickets->nextrecord() ; my $date = getdate() ; ## not part of BerkeleyDB::Lite my $time = gettime() ; ## not part of BerkeleyDB::Lite $tickets->{$serial} = { @_ } ; $bytime->{ "$date $time" } = $serial ; return $serial ; } ## Get a list of visitors on a certain date sub showvisitorsbydate { my $date = shift ; return $bytime->matchingvalues( $date ) ; } =head1 DESCRIPTION BerkeleyDB::Lite is an interface to Paul Marquess's BerkeleyDB that provides simplified constructors, tied access to data, and methods for returning multiple record sets. =head2 Example 1 BerkeleyDB::Lite maintains BerkeleyDB environment references in a package variable hash keyed on the B argument. The basic BerkeleyDB::Lite constructor arguments define the BerkeleyDB environment and database. When the constructor is called, a previously opened environment is used if available. Otherwise, a new environment is created and is available to future constructor requests. This version of BerkeleyDB::Lite creates all environment objects as concurrent data stores. Transactional data storage is not currently integrated. By default, BerkeleyDB::Lite is designed to marshall objects into a database using the B module. Example 1 shows a simple application that illustrates both of these features. The constructor is called with the minimum arguments to identify the environment and the database. These few lines of code are sufficient to add persistent object support to an application. =head2 Example 2 One of Berkeley's most appealing features is support for duplicate keys. This feature enables a programmer to use persistent arrays, where elements can be accessed, added, and deleted without marshalling. Example 2 uses the B constructor which disables the automatic serialization of record access. Otherwise, if the B constructor is used, scalars will be returned as scalar references, regardless of how they are stored. B<&duplicatekeys> is a subroutine that returns a pair of constants as a shortcut. The constants are defined in the BerkeleyDB module. The B method returns a stored list from the database. This method is available to both BerkeleyDB::Lite::Btree and BerkeleyDB::Lite::Hash classes. The B method is used to delete an element from the list. Since BerkeleyDB::Lite adheres to the B interface, the B can normally used to remove stored objects. The B should be used on databases with duplicate keys to avoid indeterminate results. BerkeleyDB returns the status of a delete operation. This feature can be used to delete an entire list using the following idiom: while ( ! delete $types->{primate} ) {} A BerkeleyDB database configured for duplicate keys also allows duplicate key/value pairs. For most one-to-many data sets, key/value pairs should be unique. This issue has not been completely resolved. Presently, the workaround is to import a retrieved list into a hash structure: %unique = map { $_ => 1 } $types->recordset('primate') ; keys %unique ; However, care should be taken when deleting elements. The delete method for duplicate keys should almost always be invoked using an idiom similar to the one above: while ( ! $types->delete( primate => 'samson' ) ) {} Another source of problems occurs when using the B method on databases containing objects. In this case, the second argument may refer to an object that does not exactly match the stored value. The following code illustrates this difficulty: my $cats = new BerkeleyDB::Lite::Btree( home => 'zoo', filename => 'cats', &duplicatekeys, ) ; my $Felix = new BigCat dinner => 'antelope' ; $cats->{lion} = $Felix ; $Felix->{dinner} = 'gazelle' ; $cats->delete( lion => $Felix ) ; ## fails This problem also occurs because the results of the marshalling operation differ depending on whether numbers are interpreted as integers, floats, or strings. Thus an object's value may change merely as a result of its context. The following example illustrates the situation: $weight = '300 lbs.' ; $weight =~ s/\D//g ; my $Felix = new BigCat( weight => $weight ) ; ## member as string $cats->{lion} = $Felix ; $cats->delete( lion => $Felix ) ## operation fails if $Felix->{weight} > 200 ; ## member as integer =head2 Example 3 Example 3 shows a few additional features helpful to developers accustomed to relational databases. These features take advantage of the B database capabilities, and are not available to BerkeleyDB::Lite::Hash objects. The B method of BerkeleyDB::Lite::Btree returns a new unique key. Each B call creates a new blank record to avoid race conditions, and returns the new key. This method creates a key by adding 1 to the last record. In order to ensure that the last record contains the highest valued key, use the B<&incrementkeys> argument to the BerkeleyDB::Lite::Btree constructor. The B<&incrementkeys> function is a shortcut that returns a CODE constant that forces numerical Btree sorting. There is a significant disadvantage to databases created using the B<&incrementkeys> argument. The resulting databases are incompatible with SleepyCat utilities such as B and B. As an alternative, B can be called as a method from the BerkeleyDB::Lite::Btree::Lexical subclass. This subclass functions identically, but the numerical keys are stored as zero padded strings. Therefore, a restriction on B subclass databases is that keys must be numerically less than 10,000,000,000. The B constructor to the BerkeleyDB::Lite::Btree class is synonymous with the B constructor to the BerkeleyDB::Lite::Btree::Lexical subclass. BerkeleyDB::Lite also implements another nice Berkeley feature: partial string matching. The methods B, B, and B all return a set of records whose keys begin with a common substring. For example, if keys are defined with the following format: S<"2002 Jul 14 15:30">, the following data can be returned: ## All records for the year @annually = $bytime->matchingkeys('2002 ') ; ## All records for the month @monthly = $bytime->matchingvalues('2002 Jul ') ; ## All records for the day %daily = $bytime->searchset('2002 Jul 14 ') ; B returns an array of the matching records' keys. B returns an array of the matching records' values. Unforeseen confusion may result from the method name B- the returned records have matching keys, but the record values are returned. B returns the matching records as key/value pairs that can populate an associative array as shown. However, using an associative array is pointless if the database contains duplicate keys. The following code is an effective technique for capturing the results of this type of search: foreach ( $bytime->matchingkeys( '2002 Jul 14', &uniquekeys ) ) { $daily{ $_ } = [ $bytime->recordset( $_ ) ] ; } B<&uniquekeys> returns a constant that is used primarily as an argument to the B method to filter duplicate results from the database. When this argument is passed to the B<&searchset> method, the values in the key/value pairs indicate a record count. B<&uniquekeys> cannot be used with the B method. =head2 EXPORT &duplicatekeys &incrementkeys &uniquepairs &uniquekeys =head1 AUTHOR Jim Schueler, Ejschueler@tqis.comE =head1 SEE ALSO L L F =cut