package CPAN::Mini::Webserver; use App::Cache; use CPAN::Mini::App; use CPAN::Mini::Webserver::Index; use CPAN::Mini::Webserver::Templates; use List::MoreUtils qw(uniq); use Module::InstalledVersion; use Moose; use Parse::CPAN::Authors; use Parse::CPAN::Packages; use Parse::CPAN::Meta; use Pod::Simple::HTML; use Path::Class; use PPI; use PPI::HTML; use Template::Declare; Template::Declare->init( roots => ['CPAN::Mini::Webserver::Templates'] ); if ( eval { require HTTP::Server::Simple::Bonjour } ) { extends 'HTTP::Server::Simple::Bonjour', 'HTTP::Server::Simple::CGI'; } else { extends 'HTTP::Server::Simple::CGI'; } has 'hostname' => ( is => 'rw' ); has 'cgi' => ( is => 'rw', isa => 'CGI' ); has 'directory' => ( is => 'rw', isa => 'Path::Class::Dir' ); has 'scratch' => ( is => 'rw', isa => 'Path::Class::Dir' ); has 'parse_cpan_authors' => ( is => 'rw', isa => 'Parse::CPAN::Authors' ); has 'parse_cpan_packages' => ( is => 'rw', isa => 'Parse::CPAN::Packages' ); has 'pauseid' => ( is => 'rw' ); has 'distvname' => ( is => 'rw' ); has 'filename' => ( is => 'rw' ); has 'index' => ( is => 'rw', isa => 'CPAN::Mini::Webserver::Index' ); our $VERSION = '0.39'; sub service_name { "$ENV{USER}'s minicpan_webserver"; } sub get_file_from_tarball { my ( $self, $distribution, $filename ) = @_; my $file = file( $self->directory, 'authors', 'id', $distribution->prefix ); die "unknown distribution format $file" unless ( $file =~ /\.(?:tar\.gz|tgz)$/ ); # warn "tar fzxO $file $filename"; #my $contents = `tar fzxO $file $filename`; my $contents; if ( eval { require Archive::Tar; 1 } ) { my $ar = Archive::Tar->new("$file"); $contents = $ar->get_content($filename); } else { # Use the system built-in tar (hopefully) # This one hopefully understands -z # and CPAN filenames contain hopefully no weird characters # warn "tar fzxO $file $filename"; $contents = `tar fzxO $file $filename`; } return $contents; } sub checksum_data_for_author { my ( $self, $pauseid ) = @_; my $file = file( $self->directory, 'authors', 'id', substr( $pauseid, 0, 1 ), substr( $pauseid, 0, 2 ), $pauseid, 'CHECKSUMS', ); return unless -f $file; my ( $content, $cksum ); { local $/; open my $fh, "$file" or die "$file: $!"; $content = <$fh>; close $fh; } eval $content; return $cksum; } # this is a hook that HTTP::Server::Simple calls after setting up the # listening socket. we use it load the indexes sub after_setup_listener { my $self = shift; my %config = CPAN::Mini->read_config; my $directory = dir( glob $config{local} ); $self->directory($directory); my $authors_filename = file( $directory, 'authors', '01mailrc.txt.gz' ); my $packages_filename = file( $directory, 'modules', '02packages.details.txt.gz' ); die "Please set up minicpan" unless defined($directory) && ( -d $directory ) && ( -f $authors_filename ) && ( -f $packages_filename ); my $cache = App::Cache->new( { ttl => 60 * 60 } ); my $parse_cpan_authors = $cache->get_code( 'parse_cpan_authors', sub { Parse::CPAN::Authors->new( $authors_filename->stringify ) } ); my $parse_cpan_packages = $cache->get_code( 'parse_cpan_packages', sub { Parse::CPAN::Packages->new( $packages_filename->stringify ) } ); $self->parse_cpan_authors($parse_cpan_authors); $self->parse_cpan_packages($parse_cpan_packages); my $scratch = dir( $cache->scratch ); $self->scratch($scratch); my $index = CPAN::Mini::Webserver::Index->new; $self->index($index); $index->create_index( $parse_cpan_authors, $parse_cpan_packages ); } sub print_banner { my $self = shift; print( "CPAN:Mini::Webserver is ready for your queries at " . "http://localhost:" . $self->port . "/\n" ); } sub handle_request { my ( $self, $cgi ) = @_; eval { $self->_handle_request($cgi) }; if ($@) { print "HTTP/1.0 500\r\n", $cgi->header, "

Internal Server Error

", $cgi->escapeHTML($@); } } sub _handle_request { my ( $self, $cgi ) = @_; $self->cgi($cgi); $self->hostname( $cgi->virtual_host() ); my $path = $cgi->path_info(); # $raw, $download and $install should become $action? my ( $raw, $install, $download, $pauseid, $distvname, $filename ); if ( $path =~ m{^/~} ) { ( undef, $pauseid, $distvname, $filename ) = split( '/', $path, 4 ); $pauseid =~ s{^~}{}; } elsif ( $path =~ m{^/(raw|download|install)/~} ) { ( undef, undef, $pauseid, $distvname, $filename ) = split( '/', $path, 5 ); ( $1 eq 'raw' ? $raw : $1 eq 'install' ? $install : $download ) = 1; $pauseid =~ s{^~}{}; } $self->pauseid($pauseid); $self->distvname($distvname); $self->filename($filename); #warn "$raw / $download / $pauseid / $distvname / $filename"; if ( $path eq '/' ) { $self->index_page(); } elsif ( $path eq '/search/' ) { $self->search_page(); } elsif ( $raw && $pauseid && $distvname && $filename ) { $self->raw_page(); } elsif ( $install && $pauseid && $distvname && $filename ) { $self->install_page(); } elsif ( $download && $pauseid && $distvname ) { $self->download_file(); } elsif ( $pauseid && $distvname && $filename ) { $self->file_page(); } elsif ( $pauseid && $distvname ) { $self->distribution_page(); } elsif ($pauseid) { $self->author_page(); } elsif ( $path =~ m{^/perldoc} ) { $self->pod_page(); } elsif ( $path =~ m{^/dist/} ) { $self->dist_page(); } elsif ( $path =~ m{^/package/} ) { $self->package_page(); } elsif ( $path eq '/static/css/screen.css' ) { $self->css_screen_page(); } elsif ( $path eq '/static/css/print.css' ) { $self->css_print_page(); } elsif ( $path eq '/static/css/ie.css' ) { $self->css_ie_page(); } elsif ( $path eq '/static/images/logo.png' ) { $self->images_logo_page(); } elsif ( $path eq '/static/images/favicon.png' ) { $self->images_favicon_page(); } elsif ( $path eq '/favicon.ico' ) { $self->images_favicon_page(); } elsif ( $path eq '/static/xml/opensearch.xml' ) { $self->opensearch_page(); } else { my ($q) = $path =~ m'/(.*?)/?$'; $self->not_found_page($q); } } sub not_found_page { my $self = shift; my $q = shift; my ( $authors, $dists, $packages ) = $self->_do_search($q); print "HTTP/1.0 200 OK\r\n"; print $self->cgi->header; print Template::Declare->show( '404', { parse_cpan_authors => $self->parse_cpan_authors, q => $q, authors => $authors, distributions => $dists, packages => $packages } ); } sub redirect { my $self = shift; my $url = shift; print "HTTP/1.0 302 OK\r\n"; print $self->cgi->redirect($url); } sub index_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header; print Template::Declare->show('index'); } sub search_page { my $self = shift; my $cgi = $self->cgi; my $q = $cgi->param('q'); my ( $authors, $dists, $packages ) = $self->_do_search($q); print "HTTP/1.0 200 OK\r\n"; print $cgi->header; print Template::Declare->show( 'search', { parse_cpan_authors => $self->parse_cpan_authors, q => $q, authors => $authors, distributions => $dists, packages => $packages } ); } sub _do_search { my $self = shift; my $q = shift; my $index = $self->index; my @results = $index->search($q); my ( @authors, @distributions, @packages ); if ( $q !~ /(?:::|-)/ ) { @authors = uniq grep { ref($_) eq 'Parse::CPAN::Authors::Author' } @results; } if ( $q !~ /::/ ) { @distributions = uniq grep { ref($_) eq 'Parse::CPAN::Packages::Distribution' } @results; } if ( $q !~ /-/ ) { @packages = uniq grep { ref($_) eq 'Parse::CPAN::Packages::Package' } @results; } @authors = sort { $a->name cmp $b->name } @authors; @distributions = sort { my @acount = $a->dist =~ /-/g; my @bcount = $b->dist =~ /-/g; scalar(@acount) <=> scalar(@bcount) || $a->dist cmp $b->dist } @distributions; @packages = sort { my @acount = $a->package =~ /::/g; my @bcount = $b->package =~ /::/g; scalar(@acount) <=> scalar(@bcount) || $a->package cmp $b->package } @packages; return ( \@authors, \@distributions, \@packages ); } sub author_page { my $self = shift; my $cgi = $self->cgi; my $pauseid = $self->pauseid; my @distributions = sort { $a->distvname cmp $b->distvname } grep { $_->cpanid eq uc $pauseid } $self->parse_cpan_packages->distributions; my $author = $self->parse_cpan_authors->author( uc $pauseid ); my $cksum = $self->checksum_data_for_author( uc $pauseid ); my %dates; if ( not $@ and defined $cksum ) { foreach my $dist (@distributions) { $dates{ $dist->distvname } = $cksum->{ $dist->filename }->{mtime}; } } print "HTTP/1.0 200 OK\r\n"; print $cgi->header; print Template::Declare->show( 'author', { author => $author, pauseid => $pauseid, distributions => \@distributions, dates => \%dates, } ); } sub distribution_page { my $self = shift; my $cgi = $self->cgi; my $pauseid = $self->pauseid; my $distvname = $self->distvname; my ($distribution) = grep { $_->cpanid eq uc $pauseid && $_->distvname eq $distvname } $self->parse_cpan_packages->distributions; my $filename = $distribution->distvname . "/META.yml"; my $metastr = $self->get_file_from_tarball( $distribution, $filename ); my $meta = {}; my @yaml = eval { Parse::CPAN::Meta::Load($metastr); }; if ( not $@ ) { $meta = $yaml[0]; } my $cksum_data = $self->checksum_data_for_author( uc $pauseid ); $meta->{'release date'} = $cksum_data->{ $distribution->filename }->{mtime}; my @filenames = $self->list_files($distribution); print "HTTP/1.0 200 OK\r\n"; print $cgi->header; print Template::Declare->show( 'distribution', { author => $self->parse_cpan_authors->author( uc $pauseid ), distribution => $distribution, pauseid => $pauseid, distvname => $distvname, filenames => \@filenames, meta => $meta, pcp => $self->parse_cpan_packages, } ); } sub pod_page { my $self = shift; my $cgi = $self->cgi; my ($pkgname) = $cgi->keywords; my $m = $self->parse_cpan_packages->package($pkgname); my $d = $m->distribution; my ( $pauseid, $distvname ) = ( $d->cpanid, $d->distvname ); my $url = "/package/$pauseid/$distvname/$pkgname/"; $self->redirect($url); } sub install_page { my $self = shift; my $cgi = $self->cgi; my $pauseid = $self->pauseid; my $distvname = $self->distvname; my ($distribution) = grep { $_->cpanid eq uc $pauseid && $_->distvname eq $distvname } $self->parse_cpan_packages->distributions; my $file = file( $self->directory, 'authors', 'id', $distribution->prefix ); print "HTTP/1.0 200 OK\r\n"; print $cgi->header; printf '

Installing %s

',
        $distribution->distvname;

    warn sprintf "Installing '%s'\n", $distribution->prefix;

    require CPAN;    # loads CPAN::Shell
    CPAN::Shell->install( $distribution->prefix );

    printf '
Go back', $self->pauseid, $self->distvname; } sub file_page { my $self = shift; my $cgi = $self->cgi; my $pauseid = $self->pauseid; my $distvname = $self->distvname; my $filename = $self->filename; my ($distribution) = grep { $_->cpanid eq uc $pauseid && $_->distvname eq $distvname } $self->parse_cpan_packages->distributions; my $contents = $self->get_file_from_tarball( $distribution, $filename ); my $parser = Pod::Simple::HTML->new; my $port = $self->port; my $host = $self->hostname; $parser->perldoc_url_prefix("http://$host:$port/perldoc?"); $parser->index(0); $parser->no_whining(1); $parser->no_errata_section(1); $parser->output_string( \my $html ); $parser->parse_string_document($contents); $html =~ s/^.*//s; $html =~ s/.*$//s; # $html # =~ s/^(.*%3A%3A.*)$/my $x = $1; ($x =~ m{indexItem}) ? 1 : $x =~ s{%3A%3A}{\/}g; $x/gme; print "HTTP/1.0 200 OK\r\n"; print $cgi->header; print Template::Declare->show( 'file', { author => $self->parse_cpan_authors->author( uc $pauseid ), distribution => $distribution, pauseid => $pauseid, distvname => $distvname, filename => $filename, contents => $contents, html => $html, } ); } sub download_file { my $self = shift; my $cgi = $self->cgi; my $pauseid = $self->pauseid; my $distvname = $self->distvname; my $filename = $self->filename; my ($distribution) = grep { $_->cpanid eq uc $pauseid && $_->distvname eq $distvname } $self->parse_cpan_packages->distributions; my $file = file( $self->directory, 'authors', 'id', $distribution->prefix ); if ($filename) { my $contents = $self->get_file_from_tarball( $distribution, $filename ); print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -content_type => 'text/plain', -content_length => length $contents, ); print $contents; } else { open my $fh, $file or return $self->not_found_page( $self->filename ); print "HTTP/1.0 200 OK\r\n"; my $content_type = $file =~ /zip/ ? 'application/zip' : 'application/x-gzip'; print $cgi->header( -content_type => $content_type, -content_disposition => "attachment; filename=" . $file->basename, -content_length => -s $fh, ); while (<$fh>) { print; } } } sub raw_page { my $self = shift; my $cgi = $self->cgi; my $pauseid = $self->pauseid; my $distvname = $self->distvname; my $filename = $self->filename; my ($distribution) = grep { $_->cpanid eq uc $pauseid && $_->distvname eq $distvname } $self->parse_cpan_packages->distributions; my $file = file( $self->directory, 'authors', 'id', $distribution->prefix ); my $contents = $self->get_file_from_tarball( $distribution, $filename ); my $html; if ( $filename =~ /\.(pm|pl|PL|t)$/ ) { my $document = PPI::Document->new( \$contents ); my $highlight = PPI::HTML->new( line_numbers => 0 ); my $pretty = $highlight->html($document); my $split = ''; # turn significant whitespace into   my @lines = map { $_ =~ s{( +)}{"" . (" " x length($1))}e; "$split$_"; } split /$split/, $pretty; # remove the extra line number tag @lines = map { s{}{}; $_ } @lines; # remove newlines $_ =~ s{
}{}g foreach @lines; # link module names to search.cpan.org my $port = $self->port; my $host = $self->hostname; @lines = map { $_ =~ s{([^<]+?::[^<]+?)}{$1}; $_; } @lines; $html = join '', @lines; } print "HTTP/1.0 200 OK\r\n"; print $cgi->header; print Template::Declare->show( 'raw', { author => $self->parse_cpan_authors->author( uc $pauseid ), distribution => $distribution, filename => $filename, pauseid => $pauseid, distvname => $distvname, contents => $contents, html => $html, } ); } sub dist_page { my $self = shift; my ($dist) = $self->cgi->path_info =~ m{^/dist/(.+?)$}; my $latest = $self->parse_cpan_packages->latest_distribution($dist); if ($latest) { $self->redirect( "/~" . $latest->cpanid . "/" . $latest->distvname ); } else { $self->not_found_page($dist); } } sub package_page { my $self = shift; my $cgi = $self->cgi; my $path = $cgi->path_info(); my ( $pauseid, $distvname, $package ) = $path =~ m{^/package/(.+?)/(.+?)/(.+?)/$}; my ($p) = grep { $_->package eq $package && $_->distribution->distvname eq $distvname && $_->distribution->cpanid eq uc($pauseid) } $self->parse_cpan_packages->packages; my $distribution = $p->distribution; my @filenames = $self->list_files($distribution); my $postfix = $package; $postfix =~ s{^.+::}{}g; $postfix .= '.pm'; my ($filename) = grep { $_ =~ /$postfix$/ } sort { length($a) <=> length($b) } @filenames; my $port = $self->port; my $host = $self->hostname; my $url = "http://$host:$port/~$pauseid/$distvname/$filename"; $self->redirect($url); } sub list_files { my ( $self, $distribution ) = @_; my $file = file( $self->directory, 'authors', 'id', $distribution->prefix ); my @filenames; if ( $file =~ /\.(?:tar\.gz|tgz)$/ ) { # warn "tar fzt $file"; if ( eval { require Archive::Tar; 1 } ) { my $ar = Archive::Tar->new("$file"); @filenames = sort $ar->list_files(); } else { @filenames = sort `tar fzt $file`; chomp @filenames; } @filenames = grep { $_ !~ m{/$} } @filenames; } else { die "Unknown distribution format $file"; } } sub css_screen_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -type => 'text/css', -expires => '+1d' ); print Template::Declare->show('css_screen'); } sub css_print_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -type => 'text/css', -expires => '+1d' ); print Template::Declare->show('css_print'); } sub css_ie_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -type => 'text/css', -expires => '+1d' ); print Template::Declare->show('css_ie'); } sub images_logo_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -type => 'image/png', -expires => '+1d' ); print Template::Declare->show('images_logo'); } sub images_favicon_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -type => 'image/png', -expires => '+1d' ); print Template::Declare->show('images_favicon'); } sub opensearch_page { my $self = shift; my $cgi = $self->cgi; print "HTTP/1.0 200 OK\r\n"; print $cgi->header( -type => 'application/opensearchdescription+xml', -expires => '+1d' ); print Template::Declare->show('opensearch'); } 1; __END__ =head1 NAME CPAN::Mini::Webserver - Search and browse Mini CPAN =head1 SYNOPSIS % minicpan_webserver =head1 DESCRIPTION This module is the driver that provides a web server that allows you to search and browse Mini CPAN. First you must install CPAN::Mini and create a local copy of CPAN using minicpan. Then you may run minicpan_webserver and search and browse Mini CPAN at http://localhost:2963/. You may access the Subversion repository at: http://code.google.com/p/cpan-mini-webserver/ And may join the mailing list at: http://groups.google.com/group/cpan-mini-webserver =head1 AUTHOR Leon Brocard =head1 COPYRIGHT Copyright (C) 2008, Leon Brocard. This module is free software; you can redistribute it or modify it under the same terms as Perl itself.