#! /usr/bin/perl -w ## babble - a simple Babble frontend script ## Copyright (C) 2004 Gergely Nagy ## ## This file is part of Babble. ## ## Babble is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; version 2 dated June, 1991. ## ## Babble is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ## for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use strict; use Config::IniFiles; use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); use Pod::Usage; use Date::Manip; use Text::Iconv; use Babble; use Babble::Utils; use Babble::Processors::Extra; use Babble::Encode; my $cfgfile = "-"; my $verbose = 0; my $man = 0; my $help = 0; my $version = 0; GetOptions ( 'verbose|v+' => \$verbose, 'debug|d' => sub { $verbose = 3; }, 'help|?' => \$help, 'version|V' => \$version, 'man' => \$man ) or pod2usage (2); pod2usage (-exitstatus => 0, -verbose => 1) if $help; pod2usage (-exitstatus => 0, -verbose => 2) if $man; if ($version) { print "babble (Babble " . $Babble::VERSION . ")\n"; exit 0; } if ($verbose >= 3) { use Carp; $SIG{__WARN__} = \&carp; $SIG{__DIE__} = \&confess; } $| = 1; $cfgfile = $ARGV[0] if $ARGV[0]; print "Reading configuration from " . $cfgfile . " ...\n" if ($verbose >= 2); my $cfg; if ($cfgfile eq '-') { $cfg = Config::IniFiles->new (-file => \*STDIN); } else { $cfg = Config::IniFiles->new (-file => $cfgfile); } my @feeds = (); my @themes = (); my @cbs = (); my %babble_config = (); foreach my $param ($cfg->Parameters ('Babble')) { my $key = $param; my $val = $cfg->val ('Babble', $param); if ($param =~ /^\&(.*)/) { $key = $1; $val = eval "$val"; } $babble_config{$key} = $val; } push (@cbs, \&cb_collect_start) if ($verbose >= 2); my $babble = Babble->new ( -cache => { -cache_fn => $babble_config{cache_fn} || $babble_config{cache_db}, -cache_format => $babble_config{cache_format}, }, -callbacks_collect_start => \@cbs, -processors => [ \&Babble::Processors::Extra::creator_map, \&Babble::Processors::Extra::parent_map ], ); print "Recording sources ...\n" if ($verbose >= 2); foreach my $feed (grep (!/^(theme:|babble$)/i, $cfg->Sections)) { my %feed_params = ( -location => $feed, -cache_parsed => $babble_config{cache_parsed}, -cache_only => $babble_config{cache_only}, -preprocessors => [ \&utficate, \&htmlfix ], -id => $cfg->val ($feed, 'name') || $feed, ); my $type = $cfg->val ($feed, 'type') || "RSS"; print ' Recording source `' . $feed . '\' (type ' . $type . ") ...\n" if ($verbose >= 3); eval "use Babble::DataSource::$type;"; if ($@) { $type = uc ($type); eval "use Babble::DataSource::$type;"; die $@ if $@; } foreach my $param (grep (!/^(name|type)/, $cfg->Parameters ($feed))) { my $key = '-' . $param; my $val = $cfg->val ($feed, $param); if ($param =~ /^\&(.*)/) { $key = '-' . $1; $val = eval "$val"; } $feed_params{$key} = $val; } push (@feeds, "Babble::DataSource::$type"->new (%feed_params)); } $babble->add_params (%babble_config); $babble->add_sources (@feeds); print "Collecting sources ...\n" if ($verbose >= 2); $babble->collect_feeds (); print "Caching ...\n" if ($verbose >= 2); $babble->cache ( -cache_fields => [ 'id', 'date' ] ); print "Limiting ...\n" if ($verbose >= 2); my (@collection, $limit); $limit = $babble_config{days_per_page}; if ($limit) { @collection = sort { $b->date_iso cmp $a->date_iso } $babble->search ([{ field => 'date', pattern => ParseDate ( DateCalc ('today', '-' . $limit . 'days')), cmp => sub { my ($a, $b) = @_; return (Date_Cmp ($a, $b) > 0); }, }]); } @collection = $babble->sort () unless @collection; $limit = $babble_config{items_per_page} || $babble_config{limit}; delete @collection[$limit..$#collection] if $limit; $babble->add_params (babble_collection => \@collection); foreach my $theme (split (/ /, $babble_config{themes})) { my %theme_config = (); foreach my $param ($cfg->Parameters ("theme:" . $theme)) { my $key = $param; my $val = $cfg->val ('theme:' . $theme, $param); if ($param =~ /^\&(.*)/) { $key = $param; $val = eval "$val"; } $theme_config{$key} = $val; } if ($theme_config{theme}) { $theme_config{-theme} = $theme_config{theme}; } else { $theme_config{-theme} = $theme; } foreach my $format (split (/ /, $theme_config{formats})) { my $file; if ($theme_config{output}) { $file = $theme_config{output}; } else { $file = "index." . $format; } print "Generating " . $babble_config{output_dir} . "/" . $file . " ...\n" if $verbose; $theme_config{-format} = $format; open (OUTF, ">" . $babble_config{output_dir} . "/" . $file); binmode OUTF, ":raw"; print OUTF $babble->output (%theme_config); close (OUTF); } } sub utficate { my $text = shift; my $source_encoding = 'utf-8'; my ($c, $tmp); if ($$text =~ /<\?.*encoding=[\'"](.*?)[\'"].*\?>/) { # The above regexp comes from Mark Pilgrim's # UltraLiberalFeedParser. $source_encoding = $1; $$text =~ s/(<\?.*encoding=[\'"]) (.*?) ([\'"].*\?>)/${1}utf-8${3}/x; } $c = Text::Iconv->new ($source_encoding, 'utf-8'); $tmp = $c->convert ($$text); return ($$text = $tmp) if $tmp; # Right, so the conversion did not go well. Let's go hardcore. foreach my $encoding ('iso-8859-2', 'iso-8859-1', 'windows-1250') { $c = Text::Iconv->new ($encoding, 'utf-8'); $tmp = $c->convert ($$text); return ($$text = $tmp) if $tmp; } # We are still here? OH GOD! This must be some broken feed, so # lets carp a little. carp "Feed with broken encoding encountered."; # Then strip all the unsafe chars $$text =~ s/[^[:ascii:]]/./; $$text = $c->convert ($$text); } sub htmlfix { my $text = shift; return unless $$text; $$text =~ s/ /\ /g; $$text =~ s/€/\€/g; $$text =~ s/\&((?!(\w+|#\d+);))/\&$1/g; $$text =~ s/\<([^\s\>\@]*\@[^\>\s]*)\>/\<$1\>/g; } sub cb_collect_start { my $source = shift; print ' Collecting: `' . $$source->{-id} . '\' from ' . $$source->{-location} . "\n"; } =pod =head1 NAME babble - Simplistic Babble frontend =head1 SYNOPSIS babble [options] [configfile] =head1 DESCRIPTION The babble script is a simple wrapper around the core functionality of Babble. It can read a config file, which defines an aggregation and a set of output templates. Using this information, the script will generate static output. =head1 OPTIONS =over 4 =item --debug Enable debugging output, slightly more verbose than B<--verbose>. =item --help Display a help message and exit. =item --verbose Enable verbose mode, printing the current state of processing. =item --version Print version information and exit. =back =head1 CONFIG FILE SYNTAX The configuration file syntax is pretty easy and straightforward. There are a few special sections, and so-called feed sections. The first and most important section is I, which describes the current babble's basic properties. The script recognises the I and I variables under this section. All other variables will be used as parameters in the created Babble object. Then, there are the so-called theme descriptor sections. They all begin with a I prefix, and the rest is one of the items in the I section's I variable. That is, if the configration file says something like this: [Babble] themes = html rss20 Then the two recognised theme descriptor sections are I and I. Each of these sections can contain a I variable (defaults to the name of the theme, ie I and I in our example), which specifies the theme to use; I, which selects the formats to use (some themes provide multiple output formats); and I, the name of the output file. =head1 EXAMPLE [Babble] themes = html rss20 output_dir = output cache_db = feed_cache.db [theme:html] theme = planet_gray output = index.html [theme:rss20] theme = XML output = rss20.xml =head1 AUTHOR Gergely Nagy, algernon@bonehunter.rulez.org Bugs should be reported at L. =head1 SEE ALSO Babble, babble-cache =cut # arch-tag: 6ab4db9f-fab6-4947-a24d-34cab756e1d3