## Babble/DataSource/FlatFile.pm ## Copyright (C) 2004 Gergely Nagy ## ## This file is part of Babble. ## ## Babble is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; version 2 dated June, 1991. ## ## Babble is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ## for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package Babble::DataSource::FlatFile; use strict; use Carp; use Babble::Encode; use Babble::Document; use Babble::Document::Collection; use Babble::DataSource; use File::Find; use File::stat; use File::Basename; use Date::Manip; use User::pwent; use vars qw(@ISA); @ISA = qw(Babble::DataSource); =pod =head1 NAME Babble::DataSource::FlatFile - Flat file source fetcher for Babble =head1 SYNOPSIS use Babble; use Babble::DataSource::FlatFile; my $babble = Babble->new (); $babble->add_sources ( Babble::DataSource::FlatFile->new ( -location => "/home/me/blog/data", -extension => "\.blog", -permalink_base => 'http://example.org/~me/blog/' ) ); ... =head1 DESCRIPTION Babble::DataSource::FlatFile implements a Babble data source class that fetches documents directly from the filesystem. =cut my $permalink_blosxom = sub { my ($base, $file, $date, $ext) = @_; my $anchor = basename ($$file); $anchor =~ s/$$ext$//g; return $$base . UnixDate (ParseDate ($$date), "%Y/%m/%d/#") . $anchor; }; =head1 METHODS =over 4 =item new (%params) This method creates a new object. The recognised arguments are B<-location>, which specifies the directory where documents should be collected from; B<-extension>, a regexp that will be used to find out which files are documents, and which are not (defaults to C<\.txt>); B<-permalink_base>, the base URL for the collection (used by the permanent link generator, see later); and B<-permalink_gen>, a code reference that is used to generate links to documents. The method specified in B<-permalink_gen> takes four arguments: B, B, B and B. All of them are string scalar references. Base is what we specified using B<-permalink_base>, file is the full path to the filename we're currently operating on, date is its submission date, and ext is its extension. =cut sub new { my $type = shift; my $class = ref ($type) || $type; my $self = $class->SUPER::new (@_); $self->{-extension} = "\.txt" unless $self->{-extension}; $self->{-permalink_gen} = \&$permalink_blosxom unless $self->{-permalink_gen}; bless $self, $type; } =pod =item collect () This function finds all the files in the data directory (recursively) which have the specified extension, and makes a B out of them. The title is the first line of the file, the date is its modification time, subject is the subdirectory under which it was found (or main, if it was in the top-level directory), author is the user owning the file, content is the file content, save the first line. It's id property contains a pointer to the entry (eg, to one's weblog). This is generated by the B<$source-E{permalink_gen}> function, explained above. For the Babble::Document::Collection object to return, some information will be gathered from the Babble object which calls this method, or from the parameters passed to us. Namely, the B, B, B, B, B, B, B and B keys will be used, if present. =cut sub collect ($) { my ($self, $babble) = @_; my @files = (); my $collection; my %args; foreach ("meta_title", "meta_desc", "meta_link", "meta_owner_email", "meta_subject", "meta_feed_link", "meta_owner", "meta_image") { $args{$_} = $self->{$_} || $$babble->{Params}->{$_} || ""; $args{$_} = to_utf8 ($args{$_}); } find ({wanted => sub { /$self->{-extension}$/ && push (@files, $File::Find::name); }}, $self->{-location} || $self->{-data_dir}); $collection = Babble::Document::Collection->new ( title => $args{meta_title}, link => $args{meta_feed_link}, id => $args{meta_link}, author => $args{meta_owner} || $args{meta_owner_email}, content => $args{meta_desc}, date => ParseDate ("today"), subject => $args{meta_subject}, name => to_utf8 ($self->{-id}) || $args{meta_owner} || $args{meta_owner_email} || $args{meta_title}, image => $args{meta_image}, ); foreach my $file (@files) { my ($title, $subject, $date, $link); my @content; my $doc; open (INF, "<" . $file); $title = ; chomp ($title); @content = ; close (INF); $subject = dirname ($file); $subject =~ s/^$self->{-location}/./; $subject =~ s{^\./?}{}; $subject = "main" unless $subject; $date = gmtime (stat ($file)->mtime); $link = $self->{-permalink_gen} (\$self->{-permalink_base}, \$file, \$date, \$self->{-extension}); $doc = Babble::Document->new ( title => to_utf8 ($title), id => $link, content => to_utf8 (join ("", @content)), subject => to_utf8 ($subject), date => ParseDate ($date), author => to_utf8 (getpwuid (stat ($file)->uid)->name), ); push (@{$collection->{documents}}, $doc); } return $collection; } =pod =back =head1 AUTHOR Gergely Nagy, algernon@bonehunter.rulez.org Bugs should be reported at L. =head1 SEE ALSO Babble::Document(3pm), Babble::Document::Collection(3pm), Babble::DataSource(3pm) =cut 1; # arch-tag: 13b638e3-a4e5-4b81-a924-dc931cd25ded