# -*-perl-*- use Config; &read_makefile; $fullperl = resolve_make_var('FULLPERL') || $Config{'perlpath'}; $islib = resolve_make_var('INSTALLSITELIB'); $name = $0; $name =~ s~^.*/~~; $name =~ s~.PL$~~; open(OUT,"> $name") || die "Could open $name for writing: $!\n"; print "writing $name\n"; while () { if (m~^\#!/.*/perl.*$~o) { # This substitutes the path perl was installed at on this system # _and_ removed any (-w) options. print OUT "#!",$fullperl,$1,"\n"; next; } if (/^use lib/o) { # This substitutes the actuall library install path print OUT "use lib '$islib';\n"; next; } print OUT; } close(OUT); # Make it executable too, and writeable chmod 0755, $name; #### The library sub resolve_make_var ($) { my($var) = shift @_; my($val) = $make{$var}; # print "Resolving: ",$var,"=",$val,"\n"; while ($val =~ s~\$\((\S+)\)~$make{$1}~g) {} # print "Resolved: $var: $make{$var} -> $val\n"; $val; } sub read_makefile { open(MAKEFILE, 'Makefile') || die "Could not open Makefile for reading: $!\n"; while () { chomp; next unless m/^([A-Z]+)\s*=\s*(\S+)$/; $make{$1}=$2; # print "Makevar: $1 = $2\n"; } close(MAKEFILE) } __END__ #!/usr/bin/perl # Perl 5.002 or later. w3mir is mostly tested with perl 5.004 # use lib '/hom/janl/lib/perl'; # # Can perform the following fixes: # - Rewrite redirected to URLs # - change .../ into .../index.html (or .../Welcome.html) # - change external links to point to some helpfull .html file ---- NOT IMPLEMENTED # - change links to documents not retrived to point to some helpfull # .html file --- NOT IMPLEMENTED # - After adding a 'Also' directive we can edit the urls pointing to within # the new space of the retrival scope, making the pointers consistent. # Method: # 1 Gather list of all URLs to be rewritten: # - Redirects: Just read the .redirs file # - .../ into .../index.html: All can be found in the .referers file # Remember to rewrite missing / redirects too... # - external links: in the .referers file too # - non-retrived documents: What files should be here (according to # .referers file) but are not? # 2 Gather list of all documents needing editing # 3 Edit them # # 09/05/98 janl - Only rewrite .../ to .../$indexname if the attribute # does not refer to a directory. -> 0.6.2 # 12/05/98 janl - Use ->local_path to determine filename -> 0.6.3 # 22/05/98 janl - .../#foo was not being rewritten to # .../$indexname#foo -> 0.6.4 # require 5.002; use vars qw($win32); # To figure out what kind of system this is BEGIN { use Config; $win32 = ( $Config{'osname'} eq 'MSWin32' ); } use Carp; use htmlop; use URI::URL; use URI::Escape; eval ' # This is for URI 1.0 only use URI; # Make URI behave in a (more) sensible manner $URI::ABS_ALLOW_RELATIVE_SCHEME=1; $URI::ABS_REMOTE_LEADING_DOTS=1; '; use strict; my $VERSION; $VERSION='0.6.6'; my $debug=0; # Not debugging my $verbose=0; my $indexname='index.html'; my $chdirto=''; # Place to chdir to after reading # config file my $infoloss=0; # 1 if any URL translations (which # cause information loss) are in # effect. If this is true we use the # SAVEURL operation. What to get, and # not. Text of user supplied # fetch/ignore rules my $doindex=1; # append $indexname to /$ ? my $editthis=''; # Edit references matching this expression. my $files=0; # How many files have I edited? my $rc=''; my $rule_text="# User defined fetch/ignore rules\n"; # Code ref to the rule procedure my $rule_code; # Code to prefix and postfix the generated code. Prefix should make # $_ contain the url to match. Postfix should return 1, the default # is to get the url/file. my $rule_prefix='$rule_code = sub { local($_) = shift;'."\n"; my $rule_postfix=' return 1; } '; # Scope tests generated by URL/Also directives in cfg. The scope code # is just like the rule code, but used for program generated # fetch/ignore rules related to multiscope retrival. my $scope_fetch="# Automatic fetch rules for multiscope retrival\n"; my $scope_ignore="# Automatic ignore rules for multiscope retrival\n"; my $scope_code; my $scope_prefix='$scope_code = sub { local($_) = shift;'."\n"; my $scope_postfix=' return 0; } '; # Function to apply to urls, se rule comments. my $user_apply_code; # User specified apply code my $apply_code; # w3mirs apply code my $apply_prefix='$apply_code = sub { local($_) = @_;'."\n"; my $apply_lc=' $_ = lc $_; '; my $apply_postfix=' return $_; } '; my @user_apply; # List of users apply rules. my @internal_apply; # List of w3mirs apply rules. my $iinline=''; # inline RE code to make RE caseinsensitive my $ipost=''; # RE postfix to make it caseinsensitive my $lc=0; # Convert urls/filenames to lowercase? my $abs=0; # Absolutify URLs? my $fixrc=''; # Name of w3mfix config file my $fixup=0; # Do things needed to run fixup my $r=0; # Recurse? no recursion = absolutify links my %rum_referers=(); # Array of referers, key: rum_url my %rum_redirected=(); # Array of redirected url: key: original url my %lf_edited=(); # Edited this file yet? my $list; # List url on STDOUT? my %stat=(); # stat($lf_url): 'd' for dir, 'f' for others # ######################### Libwww-perl addons: # URI as part of libwww: sub URI::URL::_generic::unix_path { my $self = (shift)->clone; $self->frag(undef); $self->scheme("file"); $self->unix_path; } sub URI::URL::_generic::basename { my $self = shift; my @p = $self->path_components; my $old = $p[-1]; if (@_) { splice(@p, -1, 1, shift); $self->path_components(@p) } $old; } # URI 1.0 sub URI::URL::unix_path { my $self = (shift)->clone; return uri_unescape($self->path); } sub URI::_generic::basename { my $self = shift; my @p = $self->path_segments; my $old = $p[-1]; if (@_) { splice(@p, -1, 1, shift); $self->path_segments(@p) } $old; } # ######################### Configuration/argument parsing sub parse_args { my $f; my $i; $i=0; while ($f=shift) { $i++; # This is a demonstration against Getopts::Long. if ($f =~ s/^-+//) { $verbose=-1,next if $f eq 'q'; # Quiet $verbose=1,next if $f eq 'c'; # Chatty die "w3mfix version $VERSION\n" if $f eq 'v'; # Version die "rtfm\n" if ($f eq 'help' || $f eq 'h' || $f eq '?'); if ($f eq 'editref') { die "Sorry, can only have one -editref pr. run\n" if $editthis; $editthis=quotemeta(shift); next; } if ($f eq 'd') { # Debugging level $f=shift; unless (($debug = $f) > 0) { die "w3mfix: debug level must be a number greater than zero.\n"; } next; } # Those were all the options... warn "w3mfix: Unknown option: -$f. Use -h for usage info.\n"; exit(1); } else { # If we get this far then ... it's a configuration file name: $rc = $f; die "w3mfix: Got a non-option argument that wasn't the name of a\n". "(configuration) file either\n" unless -f $f; } } } sub parse_cfg_file { # Read the configuration file. Aborts on errors. # Ignores w3mir options w3mfix does not need itself. my ( $file ) = @_ ; my ($key, $value, $authserver,$authrealm,$authuser,$authpasswd); my $i; die "w3mfix: config file $file is not a file.\n" unless -f $file; open(CFGF, $file) || die "Could not open config file $file: $!\n"; # print STDERR "Reading $file\n"; $i=0; while () { # Trim off various junk chomp; s/^#.*//; s/^\s+|\s$//g; # Anything left? next if $_ eq ''; # Examine remains $i++; ($key, $value) = split(/\s*:\s*/,$_,2); $key = lc $key; # These are no-ops in w3mfix next if ( $key eq 'initial-referer' ); next if ( $key eq 'header' ); next if ( $key eq 'pause' ); next if ( $key eq 'retry-pause' ); next if ( $key eq 'retries' ); next if ( $key eq 'robot-rules' ); next if ( $key eq 'remove-nomirror' ); next if ( $key eq 'file-disposition' ); next if ( $key eq 'http-proxy' ); next if ( $key eq 'proxy-options' ); next if ( $key eq 'auth-domain' ); next if ( $key eq 'auth-user' ); next if ( $key eq 'auth-passwd' ); next if ( $key eq 'disable-headers' ); next if ( $key eq 'agent' ); $debug=numeric($value),next if ( $key eq 'debug' ); umask(numeric($value)),next if ( $key eq 'umask' ); $indexname=$value,next if ($key eq 'index-name'); $verbose=nway($value,'quiet','brief','chatty')-1,next if ( $key eq 'verbosity' ); if ( $key eq 'cd' ) { $chdirto=$value; next; } if ($key eq 'url') { my ($rum_url_o,$lf_dir,$rum_reurl,$rum_rebase); # A two argument URL: line? if ($value =~ m/^(.+)\s+(.+)/i) { # Two arguments. $rum_url_o=url $1; # The last is a directory, it must end in / $lf_dir=$2; $lf_dir.='/' unless $lf_dir =~ m~/$~; # The first is a URL, make it more canonical, find the base. # The namespace confusion in this section is correct.(??) $rum_rebase = quotemeta( ($rum_url_o->as_string =~ m~^(.*/)~)[0] ); # print "URL: ",$rum_url_o->as_string,"\n"; # print "Base: $rum_rebase\n"; # Translate from rum space to lf space: push(@internal_apply,"s/^".$rum_rebase."/".quotemeta($lf_dir)."/"); # That translation could lead to information loss. $infoloss=1; # Fetch rules tests the rum_url_o->as_string. Fetch whatever # matches the base. $scope_fetch.="return 1 if m/^".$rum_rebase."/".$ipost.";\n"; # Ignore whatever did not match the base. $scope_ignore.="return 0 if m/^". quotemeta("http://".$rum_url_o->netloc."/")."/".$ipost.";\n"; } else { # $rum_url_o=root_quene($value); $rum_url_o=url $value; $rum_rebase = quotemeta( ($rum_url_o->as_string =~ m~^(.*/)~)[0] ); # Translate from rum space to lf space: push(@internal_apply,"s/^".$rum_rebase."//"); $scope_fetch.="return 1 if m/^".$rum_rebase."/".$ipost.";\n"; $scope_ignore.="return 0 if m/^". quotemeta("http://".$rum_url_o->netloc."/")."/".$ipost.";\n"; } next; } if ($key eq 'also' || $key eq 'also-quene') { if ($value =~ m/^(.+)\s+(.+)/i) { my ($rum_url_o,$lf_dir,$rum_reurl,$rum_rebase); # Two arguments. $rum_url_o=url $1; $rum_url_o->host(lc $rum_url_o->host); # The last is a directory, it must end in / $lf_dir=$2; $lf_dir.='/' unless $lf_dir =~ m~/$~; # The first is a URL, find the base $rum_rebase = quotemeta( ($rum_url_o->as_string =~ m~^(.*/)~)[0] ); # Ok, now we can transform and select stuff the right way push(@internal_apply,"s/^".$rum_rebase."/".quotemeta($lf_dir)."/"); $infoloss=1; # Fetch rules tests the rum_url_o->as_string. Fetch whatever # matches the base. $scope_fetch.="return 1 if m/^".$rum_rebase."/".$ipost.";\n"; # Ignore whatever did not match the base. This cures problem # with '..' from base in in rum space pointing within the the # scope in ra space. We introduced a extra level (or more) of # directories with the apply above. Must do same with 'Also:' # directives. $scope_ignore.="return 0 if m/^". quotemeta("http://".$rum_url_o->netloc."/")."/".$ipost.";\n"; } else { die "Also: requires 2 arguments\n"; } next; } if ($key eq 'quene') { # root_quene($value); next; } if ($key eq 'ignore-re' || $key eq 'fetch-re') { # Check that it's a re, better that I am strict than for perl to # make compilation errors. unless ($value =~ /^m(.).*\1[gimosx]*$/) { print STDERR "w3mfix: $value is not a recognized regular expression\n"; exit 1; } } if ($key eq 'fetch' || $key eq 'fetch-re') { my $expr=$value; $expr = wild_re($expr).$ipost if ($key eq 'fetch'); $rule_text.=' return 1 if '.$expr.";\n"; next; } if ($key eq 'ignore' || $key eq 'ignore-re') { my $expr=$value; $expr = wild_re($expr).$ipost if ($key eq 'ignore'); $rule_text.=' return 0 if '.$expr.";\n"; next; } if ($key eq 'apply') { unless ($value =~ /^s(.).*\1.*\1[gimosxe]*$/) { print STDERR "w3mfix: '$value' is not a recognized regular expression\n"; exit 1; } push(@user_apply,$value) ; $infoloss=1; next; } if ($key eq 'options') { my($val,$nval); foreach $val (split(/\s*,\s*/,lc $value)) { if ($i==1) { $nval=nway($val,'recurse','no-date-check','only-nonexistent', 'list-urls','lowercase','remove','batch','read-urls', 'abs'); $r=1,next if $nval==0; next if $nval==1; next if $nval==2; $list=1,next if $nval==3; if ($nval==4) { $lc=1; $iinline=($lc?"(?i)":""); $ipost=($lc?"i":""); next ; } next if $nval==5; next if $nval==6; next if $nval==7; $abs=1,next if $nval==8; } else { die "w3mfix: options must be the first directive in the config file.\n"; } } } if ($key eq 'fixup') { # chomp($fixrc=`pwd` || '.'); # $fixrc.="/$file"; # warn "Fixrc: $fixrc\n"; # $fixup=1; my($val,$nval); foreach $val (split(/\s*,\s*/,lc $value)) { $nval=nway($val,'on','run','noindex'); next if $nval==0; next if $nval==1; $doindex=0 if $nval==2; # Ignore everyting else } } } close(CFGF); } sub wild_re { # Here we translate unix wildcard subset to to perlre local($_) = shift; s#\*#\.\*#; s#\?#\.#; s#([\/\(\)\\\|\{\}\+)\$\^])#\\$1#g; return $_ = '/'.$_.'/'; } sub numeric { # Check if argument is numeric? my ( $number ) = @_ ; return oct($number) if ($number =~ /\d+/ || $number =~ /\d+.\d+/); die "Expected a number, got \"$number\"\n"; } sub boolean { my ( $boolean ) = @_ ; $boolean = lc $boolean; return 0 if ($boolean eq 'false' || $boolean eq 'off' || $boolean eq '0'); return 1 if ($boolean eq 'true' || $boolean eq 'on' || $boolean eq '1'); die "Expected a boolean, got \"$boolean\"\n"; } sub nway { my ( $value ) = shift; my ( @values ) = @_; my ( $val ) = 0; $value = lc $value; while (@_) { return $val if $value eq shift; $val++; } die "Expected one of ".join(", ",@values).", got \"$value\"\n"; } sub stat { my $file = shift; if (exists($stat{$file})) { print STDERR "++Cache hit: $file\n" if $debug; } else { stat($file); if (-e _) { $stat{$file}=(-d _)?'d':'f'; } else { $stat{$file}='n'; } print STDERR "--Cache miss: $file\n" if $debug; } return $stat{$file}; } # ######################## Read 'state' files ############################## sub read_state { my $reffile='.referers'; my $refered; my @referers; $reffile="referers" if $win32; print STDERR "reading $reffile\n" if $verbose>0; open(REFERERS,"< $reffile") || die "Could not open $reffile for reading: $!\n"; while () { chomp; ($refered,undef,@referers) = split(/\s+/); $rum_referers{$refered}= [ @referers ]; # print STDERR $refered," <- ",join(' and ',@referers),"\n"; } close(REFERERS); # Read redirection report my $redirfile='.redirs'; my $wrong; my $right; my $tmp; $redirfile="redirs" if $win32; print STDERR "reading $redirfile\n" if $verbose>0; open(REDIRS,"< $redirfile") || die "Could not open $redirfile for reading: $!\n"; while () { chomp; ($wrong,undef,$right) = split(/\s+/); $rum_redirected{$wrong}=$right; } close(REDIRS); } # ######################### Process every single tag ######################## sub process_tag { # Process a tag in html file my $lf_referer = shift; my $base_url = shift; my $tag_name = shift; my $url_attrs = shift; # Retrun quickly if no URL attributes return unless defined($url_attrs); my $attrs = shift; # Information loss through apply or processing in this procedure? my $il = $infoloss; my $redirs; my $stat; my $rum_url; # The absolute URL my $lf_url; # The local filesystem url my $lf_url_o; # ... and it's object my $key; my $orig_rum_url; # my $debug = 1; # print STDERR "\nProcess Tag: $tag_name, URL attributes: ", join(', ',@{$url_attrs}),"\nOrigin:",$base_url,"\n"; # if $debug>2; substr($lf_referer,0,0)='./' unless substr($lf_referer,0,1) eq '/'; foreach $key (@{$url_attrs}) { if (defined($$attrs{$key})) { $orig_rum_url=$rum_url=$$attrs{$key}; print STDERR "\n$key = $rum_url\n" if $debug; # Apply redirects: $redirs=0; if (exists($rum_redirected{$rum_url})) { $il=1; while (exists($rum_redirected{$rum_url})) { die "Too many redirects in a row\n" if $redirs++>32; print STDERR "$rum_url -> ".$rum_redirected{$rum_url}."\n" if $debug; $rum_url=$rum_redirected{$rum_url}; } } # Apply program/user apply rules $lf_url=apply($rum_url); if (defined($lf_url)) { # Apply directory/file check here $stat=&stat($lf_url); if ($stat eq 'f' && $lf_url =~ m~/$~) { # It's a file, remove trailing / print STDERR "****** File / fixup of $lf_url\n" if $debug; $il=1; substr($lf_url,length($lf_url)) = ''; } elsif ($stat eq 'd' && !($lf_url =~ m~/$~) ) { # It's a directory, add a trailing / print STDERR "****** Directory / fixup of $lf_url\n" if $debug; $il=1; $lf_url .= '/'; } substr($lf_url,0,0)='./' unless substr($lf_url,0,1) eq '/'; $lf_url_o=url $lf_url; my $tmp=$lf_url_o->clone; $tmp->basename($indexname); if ( $doindex && $lf_url_o->basename eq "" && &stat($tmp->unix_path) eq 'f' && !$htmlop::isdir{$key}) { $lf_url_o=$tmp; $il=1; print STDERR "indexname adjusted to ",$lf_url_o->as_string,"\n" if $debug; } # Save new value in the hash, make it a file url to get 'rel' working $lf_url_o->scheme('file'); $$attrs{$key} = ($lf_url_o->rel("file:".$lf_referer))->as_string; printf STDERR "Saved ".$$attrs{$key}."\n" if $debug; # If there is potential information loss save the old value too $$attrs{"W3MIR".$key}=$orig_rum_url if $il; } elsif ($redirs>0) { $$attrs{$key}=$rum_url; print STDERR "Saved ".$$attrs{$key}."\n" if $debug; $$attrs{"W3MIR".$key}=$orig_rum_url; } } } } # ###################### Edit the URLs in one file... ######################## sub edit_html_file { # Check if it's a html file. I know this tag is in all html # files, because w3mir put it there. my($lf_url)=shift; my($rum_url)=shift; # Figure out the filename for our local filesystem. $lf_url.=$indexname if $lf_url =~ m~/$~ || $lf_url eq ''; # Stuff in need of unquoting? $lf_url = (url "file:$lf_url")->local_path if $lf_url =~ /\%\d\d/; if (exists($lf_edited{$lf_url})) { print STDERR "Already edited $lf_url\n" if $debug; return ; } $lf_edited{$lf_url}=1; my $page; my $newpage; my $read; my $atime; my $mtime; # dev uno mode nlink uid gid rdev size atime mtime (undef,undef,undef,undef,undef,undef,undef,undef,$atime,$mtime) = stat($lf_url); if (!open(TMPF,"< $lf_url\n")) { warn "Cannot read $lf_url: $!\n" if $verbose>=0; return; } $read=sysread(TMPF,$page,10240,0); close(TMPF); if (! $page =~ /0; return ; } $files++; print STDERR "w3mfix: $lf_url" if $verbose>=0; print STDERR "$lf_url is a html file\n" if $debug; print STDERR " reading" if $verbose>0; open(TMPF,$lf_url) || die "Could not open $lf_url for reading: $!\n"; # read the whole file. { local($/)=undef; $page = ; } close(TMPF); print STDERR " ",length($page)," bytes" if $verbose>0; # It's a html document print STDERR ", editing" if $verbose>0; ($newpage,undef) = &htmlop::process($page, # $htmlop::NODOC, $htmlop::ABS,$rum_url, $htmlop::USESAVED,'W3MIR', $htmlop::TAGCALLBACK,\&process_tag,$lf_url); open(TMPF,">$lf_url") || die "\nCould not open $lf_url for writing: $!\n"; print STDERR ", saving" if $verbose>0; if (length($newpage)) { # This is ODD: close does not seem to flush the buffers. So we # force the issue. local($|)=1; print TMPF $newpage || die "\nCould not write to $lf_url (disk full?): $!\n"; } close(TMPF) || die "\nCould not close $lf_url after writing: $!\n"; # Set times back to what they were. utime $atime,$mtime,$lf_url; print STDERR ".\n" if $verbose>=0; } # ############################### Scope test sub want_this { # Find out if we want the url passed. Just pass it on to the # generated functions. my($rum_url)=shift; # Does scope rule want this? return &$scope_code($rum_url) && # Does user rule want this too? &$rule_code($rum_url) } # ############################### Apply the apply rules sub user_apply { # Apply the user apply rules return &$user_apply_code(shift); } sub internal_apply { # Apply the w3mir generated apply rules return &$apply_code(shift); } sub apply { # Apply the user apply rules. Then if URL is wanted return result of # w3mir apply rules. Return the undefined value otherwise. my $url = user_apply(shift); return undef unless want_this($url); internal_apply($url); } # ############################### Decide what URLs to edit sub edit_as_needed { my $rum_redirected; my $rum_url; my $o_rum_url; my $rum_referer; my $lf_url; my $foo; my $redirs; if ($editthis) { # Find the URLs that match $edithis foreach $o_rum_url (keys %rum_referers) { # Work on them if they (now) fall within the scope of retrival $redirs=0; $rum_url=$o_rum_url; if (exists($rum_redirected{$o_rum_url})) { while (exists($rum_redirected{$rum_url})) { die "Too many redirects in a row\n" if $redirs++>32; print STDERR "$rum_url -> ".$rum_redirected{$rum_url}."\n" if $debug; $rum_url=$rum_redirected{$rum_url}; } } next unless $rum_url =~ /$editthis/io; next unless want_this($rum_url); # Find and edit the documents containing references to $o_rum_url foreach $rum_referer (@{$rum_referers{$o_rum_url}}) { next if $rum_referer eq '(commandline)'; $lf_url=apply($rum_referer); next unless defined($lf_url); edit_html_file($lf_url,$rum_referer); } } # Don't do anything else when invoked thus return; } if ($doindex) { # Edit everything that refers anything with trailing / foreach $rum_url (grep(/\/$/,keys %rum_referers)) { foreach $rum_referer (@{$rum_referers{$rum_url}}) { next if $rum_referer eq '(commandline)'; $lf_url=apply($rum_referer); next unless defined($lf_url); edit_html_file($lf_url,$rum_referer); } } } # Edit only redirected stuff foreach $rum_redirected (keys %rum_redirected) { # print "Redirected $rum_redirected\n"; foreach $rum_url (@{$rum_referers{$rum_redirected}}) { # print "- Found in $rum_url\n"; $lf_url=apply($rum_url); next unless defined($lf_url); edit_html_file($lf_url,$rum_url); } } } # ############################### 'main' &parse_args(@ARGV); if (!$rc) { $rc='.w3mirc'; $rc='w3mir.ini' if $win32; } print STDERR "w3mfix: rc file: $rc\n" if $verbose>0; &parse_cfg_file($rc); # $chdirto is ignored, w3mir already did it # Compile second order code # - The rum scope tests my $full_rules=$scope_prefix.$scope_fetch.$scope_ignore.$scope_postfix; eval $full_rules; # warn "Scope rules:\n-------------\n$full_rules\n---------------\n"; die "Program generated rules did not compile. The code is:\n----\n". $full_rules."\n----\n" if !defined($scope_code); $full_rules=$rule_prefix.$rule_text.$rule_postfix; eval $full_rules; # warn "User rules:\n-------------\n$full_rules\n---------------\n"; # - The user specified rum tests die "Ignore/Fetch rules did not compile. The code is:\n----\n". $full_rules."\n----\n" if !defined($rule_code); # - The user specified apply rules my $full_apply=$apply_prefix.($lc?$apply_lc:''). join($ipost.";\n",@user_apply).(($#user_apply>=0)?$ipost:"").";\n". $apply_postfix; eval $full_apply; die "User apply rules did not compile. The code is: ---- ".$full_apply." ----\n" if !defined($apply_code); $user_apply_code=$apply_code; # - The w3mir generated apply rules $full_apply=$apply_prefix.($lc?$apply_lc:''). join($ipost.";\n",@internal_apply).(($#internal_apply>=0)?$ipost:"").";\n". $apply_postfix; eval $full_apply; die "Internal apply rules did not compile. The code is: ---- ".$full_apply." ----\n" if !defined($apply_code); &read_state; &edit_as_needed; exit 0; __END__ # -*- perl -*- There must be a blank line here: =head1 NAME w3mfix - fixup program for w3mir =head1 SYNOPSIS B [B] [B] =head1 DESCRIPTION B is the companion program to L. It can be used for several URL editing operations usefull in different situations. When starting B will read it's configuration file. It's name is either .w3mirc (w3mir.ini on win32) or specified on the commandline. B is controlled by the 'Fixup' directive of the configuration file (described in the L documentation). B is also affected by 'Index-name' and the one special commandline option it knows, as well as the directives/options controlling verbosity and debugging information. =head1 DESCRIPTION B can rewrite URLs in these ways: =over 4 =item * Rewrite URLs that resutled in redirects to point to the place redirected to. This is needed in all cases and will always be fixed by B. =item * Change URLs ending in .../ into .../index.html (or .../Welcome.html). This is, probably, not needed when the mirror is meant do be used with a web-server. It is usefull for browsing directly from disk or CDROM, but in this case it's, most often, required. To disable this specify the I option with the 'Fixup' directive. The default is to transform URLs ending in .../ into .../index.html. To controll the name of the index file use the I directive as documented in L =item * Change URL links to documents outside the mirror to point to some local document. Could be usefull if the mirror is destined for a CDROM to be used on a unconnected machine. THIS IS NOT YET IMPLEMENTED =item * Change URL links to documents that L was unable/forbidden to retrive to point to some local document. Pointing these to a nice informative document is probably better than random error messages from the browser. THIS IS NOT YET IMPLEMENTED =item * And, least, but far from last, B can be used to prepare an established mirror for enlargement. This feature is used thus: Add the new site or subsite to be mirrored on the configuration file (by adding B and B directives). Then run B with the B<-editref> option. When the B<-editref> option is specified B will not perform any other editing tasks. E.g.; To add I to your mirror add something like Also: http://www.yahoo.com/Science/Artificial_Life/ yahoo to the configuration file, then run w3mfix: w3mfix -editref www.yahoo.com/Science/Artificial_Life This will cause all references to I (and under) to be edited so they point to within the mirror. After B has finished you can run L in the normal manner. =back =head1 BUGS Naah. =head1 SEE ALSO L =head1 AUTHORS Bs authors can be reached at I. Bs home page is at http://www.math.uio.no/~janl/w3mir/ __END__ # -*- perl -*- There must be a blank line here: =head1 NAME w3mfix - fixup program for w3mir =head1 SYNOPSIS B [B] [B] =head1 DESCRIPTION B is the companion program to L. It can be used for several URL editing operations usefull in different situations. When starting B will read it's configuration file. It's name is either .w3mirc (w3mir.ini on win32) or specified on the commandline. B is controlled by the 'Fixup' directive of the configuration file (described in the L documentation). B is also affected by 'Index-name' and the one special commandline option it knows, as well as the directives/options controlling verbosity and debugging information. =head1 DESCRIPTION B can rewrite URLs in these ways: =over 4 =item * Rewrite URLs that resutled in redirects to point to the place redirected to. This is needed in all cases and will always be fixed by B. =item * Change URLs ending in .../ into .../index.html (or .../Welcome.html). This is, probably, not needed when the mirror is meant do be used with a web-server. It is usefull for browsing directly from disk or CDROM, but in this case it's, most often, required. To disable this specify the I option with the 'Fixup' directive. The default is to transform URLs ending in .../ into .../index.html. To controll the name of the index file use the I directive as documented in L =item * Change URL links to documents outside the mirror to point to some local document. Could be usefull if the mirror is destined for a CDROM to be used on a unconnected machine. THIS IS NOT YET IMPLEMENTED =item * Change URL links to documents that L was unable/forbidden to retrive to point to some local document. Pointing these to a nice informative document is probably better than random error messages from the browser. THIS IS NOT YET IMPLEMENTED =item * And, least, but far from last, B can be used to prepare an established mirror for enlargement. This feature is used thus: Add the new site or subsite to be mirrored on the configuration file (by adding B and B directives). Then run B with the B<-editref> option. When the B<-editref> option is specified B will not perform any other editing tasks. E.g.; To add I to your mirror add something like Also: http://www.yahoo.com/Science/Artificial_Life/ yahoo to the configuration file, then run w3mfix: w3mfix -editref www.yahoo.com/Science/Artificial_Life This will cause all references to I (and under) to be edited so they point to within the mirror. After B has finished you can run L in the normal manner. =back =head1 BUGS Naah. =head1 SEE ALSO L =head1 AUTHORS Bs authors can be reached at I. Bs home page is at http://www.math.uio.no/~janl/w3mir/