package AsciiDB::TagFile; # Copyright (c) 1997-2001 Jose A. Rodriguez. All rights reserved. # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. require Tie::Hash; @ISA = (Tie::Hash); use Cwd; use vars qw($VERSION $catPathFileName); BEGIN { eval "use File::Spec"; $catPathFileName = ($@) ? sub { join('/', @_) } : # Unix way sub { File::Spec->catfile(@_) }; # Portable way } $VERSION = '1.06'; use Carp; use AsciiDB::TagRecord; sub TIEHASH { my $class = shift; my %params = @_; my $self = {}; $self->{_DIRECTORY} = $params{DIRECTORY} || cwd; $self->{_SUFIX} = $params{SUFIX} || ''; $self->{_SCHEMA} = $params{SCHEMA}; $self->{_READONLY} = $params{READONLY}; $self->{_FILEMODE} = $params{FILEMODE}; $self->{_LOCK} = $params{LOCK} || 0; $self->{_CACHESIZE} = $params{CACHESIZE}; unless (-d $self->{_DIRECTORY}) { croak "Directory '$params{DIRECTORY}' does not exist"; } if (defined $self->{_CACHESIZE}) { $self->{_CACHESIZE} = int($self->{_CACHESIZE}); if ($self->{_CACHESIZE} == 0) { undef $self->{_CACHESIZE}; } elsif ($self->{_CACHESIZE} < 1) { croak "Cache size should be >= 0 (0 means no cache)"; $self->{_CACHESIZE} = 1; } } # Number of internal keys (ie. '_key') $self->{_INTKEYCOUNT} = 1 + keys %$self; bless $self, $class; } sub FETCH { my ($self, $key) = @_; return $self->{$key} if exists ($self->{$key}); return $self->newRecord($key); } sub STORE { my ($self, $key, $value) = @_; # Return if the user is assigning an object to itself ($a{A} = $a{A}) return if exists $self->{$key} && $self->{$key} == $value; $self->newRecord($key) unless (exists $self->{$key}); my $field; foreach $field (keys %{$self->{$key}}) { $self->{$key}{$field} = $value->{$field}; } } sub FIRSTKEY { my $self = shift; # Current keys are the union of saved keys and new created but # not saved keys my %currentKeys; my $sufix = $self->{_SUFIX}; map { $currentKeys{$_} = 1 } map { $self->decodeKey($_) } grep { $_ =~ /(.+)\Q$sufix\E$/; $_ = $1 } $self->getDirFiles(); map { $currentKeys{$_} = 1 } grep(!/^_/, keys %$self); my @currentKeys = keys %currentKeys; $self->{_ITERATOR} = \@currentKeys; shift @{$self->{_ITERATOR}}; } sub NEXTKEY { my $self = shift; shift @{$self->{_ITERATOR}}; } sub EXISTS { my ($self, $key) = @_; $self->{$key} || -f $self->fileName($key) || 0; } sub DELETE { my ($self, $key) = @_; return if $self->{_READONLY}; unlink $self->fileName($key); tied(%{$self->{$key}})->deleteRecord() if tied(%{$self->{$key}}); delete $self->{$key} if exists $self->{$key}; } sub sync { my $self = shift; my @recordsToSync = grep { ref $_ && ref($_) eq 'HASH' && tied(%$_) } values %{$self}; my $record; foreach $record (@recordsToSync) { tied(%$record)->sync(); } } sub purge { my $self = shift; my ($cacheSize) = @_; if (defined($cacheSize)) { my $dataRecords = scalar(keys %$self) - $self->{_INTKEYCOUNT}; return if $dataRecords < $cacheSize; } # This works in 5.004 no 5.003 #delete @$self{grep !/^_/, keys %{$self}}; # instead we use this... foreach (grep !/^_/, keys %{$self}) { delete $self->{$_}; } } sub newRecord { my $self = shift; my ($key) = @_; $self->purge($self->{_CACHESIZE}) if defined($self->{_CACHESIZE}); my %record; tie %record, 'AsciiDB::TagRecord', FILENAME => $self->fileName($key), SCHEMA => $self->{_SCHEMA}, READONLY => $self->{_READONLY}, FILEMODE => $self->{_FILEMODE}; $self->{$key} = \%record; } sub encodeKey { my $self = shift; my ($key) = @_; my $encodeSub = $self->{_SCHEMA}{KEY}{ENCODE}; ($encodeSub) ? &$encodeSub($key) : $key; } sub decodeKey { my $self = shift; my ($key) = @_; my $decodeSub = $self->{_SCHEMA}{KEY}{DECODE}; ($decodeSub) ? &$decodeSub($key) : $key; } sub fileName { my $self = shift; my ($key) = $self->encodeKey(@_); &$catPathFileName($$self{_DIRECTORY}, "$key$$self{_SUFIX}") } sub getDirFiles { my $self = shift; local *DIR; opendir(DIR, $$self{_DIRECTORY}) || die "Can't opendir $$self{_DIRECTORY}: $!"; my @files = grep { -f &$catPathFileName($$self{_DIRECTORY}, $_) } readdir(DIR); closedir DIR; @files; } 1; __END__ =head1 NAME AsciiDB::TagFile - Tie class for a simple ASCII database =head1 SYNOPSIS # Bind the hash to the class $tieObj = tie %hash, 'AsciiDB::TagFile', DIRECTORY => $directory, SUFIX => $sufix, LOCK => $bool, READONLY => $bool, CACHESIZE => $cacheSize, FILEMODE => $mode, SCHEMA => { ORDER => $arrayRef KEY => { ENCODE => $subRef, DECODE => $subRef } }; # Save to disk all changed records $tieObj->sync(); # Remove all records from memory (and save them if needed) $tieObj->purge(); # Remove all records from memory (and save them if needed) # iif there are more than $cacheSize records in memory $tieObj->purge($cacheSize); # Get all record keys @array = keys %hash; # Check if a record exists exists $hash{$recordKey} # Get a field $scalar = $hash{$recordKey}{$fieldName}; # Assign to a field $hash{$recordKey}{$fieldName} = $value; =head1 DESCRIPTION The B provides a hash-table-like interface to a simple ASCII database. The ASCII database stores each record in into a file: $directory/recordKey1$sufix $directory/recordKey2$sufix ... $directory/recordKeyI$sufix And a record has this format: [fieldName1]: value1 [fieldName2]: value2 ... [fieldNameI]: value2 After you've tied the hash you can access this database as access a hash of hashes: $hash{recordKey1}{fieldName1} = ... To bind the %hash to the class AsciiDB::TagFile you have to use the tie function: tie %hash, 'AsciiDB::TagFile', PARAM1 => $param1, ...; The parameters are: =over 4 =item DIRECTORY The directory where the records will be stored or readed from. The default value is the current directory. =item SUFIX The records are stored as files. The file name of a record is the key plus this sufix (if supplied). For example, if the record with key 'josear' and sufix '.record', will be stored into file: 'josear.record'. If this parameter is not supplied the records won't have a sufix. =item LOCK If you set this parameter to 1 TagFile will perform basic locking. Record files will be share locked before reading them, and exclusive locked when syncing (writing) them. This basic locking only guarantees that a record file is always written correctly, but as TagFile keep records in memory you can still suffer consistency problems reading fields. The default value is 0, i.e. the database won't be locked. =item READONLY If you set this parameter to 1 the database will be read only and all changes will be discarted. The default value is 0, i.e. the database can be changed. =item CACHESIZE Records loaded from disk (or simply created) are keeped in memory till the tied hash is deleted. You can limit the number of records in memory setting this option to a value ($cacheSize). All records are purged from memory if their count reach $cacheSize. You can purge the records manually using the purge() method. Of course, the $caseSize should be a positive number, and you can use the 0 value to turn off the caching (useful when testing). The default value for CACHESIZE is 'infinite' (more or less...) =item FILEMODE Filemode assigned to new created files. If this parameter is not supplied the new created files will have the default permissions. =item SCHEMA This parameter is a hash reference that contains the database definition. With ORDER you can specify in which order fields will be saved into the file. For example, SCHEMA => { ORDER => [ 'fieldHi', 'field2There', 'fieldWorld' ] } will save the record this way: [fieldHi]: ... [fieldThere]: ... [fieldWorld]: ... NOTE: this parameter is MANDATORY, and you have to specify all the fields. B. With KEY,ENCODE and KEY,DECODE you can define an special encoding for keys when used as filenames. For example, if using this SCHEMA: SCHEMA => { ORDER => ['a', 'b', 'c'], KEY => { ENCODE => sub { $_[0] =~ s{/}{_SLASH_}g; $_[0] }, DECODE => sub { $_[0] =~ s{_SLASH_}{/}g; $_[0] }, } } a record with the key 's1/s2' will be saved into filename 's1_SLASH_s2'. The DECODE subroutine is used to traslate back to the original key. NOTE: You should use this feature if you allow filesystem metacharacters (as '/', used in Unix to split path components) in your keys. =back The data will be saved to disk when the hash is destroyed (and garbage collected by perl), so if you need for safety to write the updated data you can call the B method to do it. =head1 EXAMPLES $dbObj = tie %tietag, 'AsciiDB::TagFile', DIRECTORY => 'data', SUFIX => '.tfr', FILEMODE => 0644, SCHEMA => { ORDER => ['name', 'address'] }; $tietag{'jose'}{'name'} = 'Jose A. Rodriguez'; $tietag{'jose'}{'address'} = 'Granollers, Barcelona, SPAIN'; $tietag{'cindy'}{'name'} = 'Cindy Crawford'; $tietag{'cindy'}{'address'} = 'YouBetIwouldLikeToKnowIt'; my $key; foreach $key (keys %tietag) { print $tietag{$key}{'name'}, "\t", $tietag{$key}{'address'}, "\n"; }