# Copyrights 2003-2009 by Mark Overmeer. # For other contributors see ChangeLog. # See the manual pages for details on the licensing terms. # Pod stripped from pm file by OODoc 1.05. package OODoc::Parser::Markov; use vars '$VERSION'; $VERSION = '1.05'; use base 'OODoc::Parser'; use strict; use warnings; use OODoc::Text::Chapter; use OODoc::Text::Section; use OODoc::Text::SubSection; use OODoc::Text::SubSubSection; use OODoc::Text::Subroutine; use OODoc::Text::Option; use OODoc::Text::Default; use OODoc::Text::Diagnostic; use OODoc::Text::Example; use OODoc::Manual; use Carp; use File::Spec; use IO::File; my $url_modsearch = "http://search.cpan.org/perldoc?"; my $url_coderoot = 'CODE'; #------------------------------------------- my @default_rules = ( [ '=cut' => 'docCut' ] , [ '=chapter' => 'docChapter' ] , [ '=section' => 'docSection' ] , [ '=subsection' => 'docSubSection' ] , [ '=subsubsection' => 'docSubSubSection' ] , [ '=method' => 'docSubroutine' ] , [ '=i_method' => 'docSubroutine' ] , [ '=c_method' => 'docSubroutine' ] , [ '=ci_method' => 'docSubroutine' ] , [ '=function' => 'docSubroutine' ] , [ '=tie' => 'docSubroutine' ] , [ '=overload' => 'docSubroutine' ] , [ '=option' => 'docOption' ] , [ '=default' => 'docDefault' ] , [ '=requires' => 'docRequires' ] , [ '=example' => 'docExample' ] , [ '=examples' => 'docExample' ] , [ '=error' => 'docDiagnostic' ] , [ '=warning' => 'docDiagnostic' ] , [ '=notice' => 'docDiagnostic' ] , [ '=debug' => 'docDiagnostic' ] # deprecated , [ '=head1' => 'docChapter' ] , [ '=head2' => 'docSection' ] , [ '=head3' => 'docSubSection' ] # problem spotter , [ qr/^(warn|die|carp|confess|croak)\s/ => 'debugRemains' ] , [ qr/^( sub \s+ \w | (?:my|our) \s+ [\($@%] | (?:package|use) \s+ \w+\: ) /x => 'forgotCut' ] ); sub init($) { my ($self, $args) = @_; $self->SUPER::init($args) or return; my @rules = @default_rules; unshift @rules, @{delete $args->{additional_rules}} if exists $args->{additional_rules}; $self->{OP_rules} = []; $self->rule(@$_) foreach @rules; $self; } #------------------------------------------- sub rule($$) { my ($self, $match, $action) = @_; push @{$self->{OP_rules}}, [$match, $action]; $self; } #------------------------------------------- sub findMatchingRule($) { my ($self, $line) = @_; foreach ( @{$self->{OP_rules}} ) { my ($match, $action) = @$_; if(ref $match) { return ($&, $action) if $line =~ $match; } elsif(substr($line, 0, length($match)) eq $match) { return ($match, $action); } } (); } sub parse(@) { my ($self, %args) = @_; my $input = $args{input} or croak "ERROR: no input file to parse specified"; my $output = $args{output} || File::Spec->devnull; my $version = $args{version} or confess; my $distr = $args{distribution} or confess; my $in = IO::File->new($input, 'r') or die "ERROR: cannot read document from $input: $!\n"; my $out = IO::File->new($output, 'w') or die "ERROR: cannot write stripped code to $output: $!\n"; # pure doc files have no package statement included, so it shall # be created beforehand. my ($manual, @manuals); my $pure_pod = $input =~ m/\.pod$/; if($pure_pod) { $manual = OODoc::Manual->new ( package => $self->filenameToPackage($input) , pure_pod => 1 , source => $input , parser => $self , distribution => $distr , version => $version ); push @manuals, $manual; $self->currentManual($manual); $self->inDoc(1); } else { $out->print($args{notice}) if $args{notice}; $self->inDoc(0); } # Read through the file. while(my $line = $in->getline) { my $ln = $in->input_line_number; if(!$self->inDoc && $line =~ s/^(\s*package\s*([\w\-\:]+)\;)//) { $out->print($1); my $package = $2; $out->print("\nuse vars '\$VERSION';\n\$VERSION = '$version';\n"); $out->print($line); $manual = OODoc::Manual->new ( package => $package , source => $input , stripped => $output , parser => $self , distribution => $distr , version => $version ); push @manuals, $manual; $self->currentManual($manual); } elsif(my($match, $action) = $self->findMatchingRule($line)) { if(ref $action) { $action->($self, $match, $line, $input, $ln) or $out->print($line); } else { no strict 'refs'; $self->$action($match, $line, $input, $ln) or $out->print($line); } } elsif($line =~ m/^=(over|back|item|for|pod|begin|end|head4)\b/ ) { ${$self->{OPM_block}} .= "\n". $line; $self->inDoc(1); } elsif(substr($line, 0, 1) eq '=') { warn "WARNING: unknown markup in $input line $ln:\n $line"; ${$self->{OPM_block}} .= $line; $self->inDoc(1); } elsif($pure_pod || $self->inDoc) { # add the line to the currently open text block my $block = $self->{OPM_block}; unless($block) { warn "WARNING: no block for line $ln in file $input\n $line"; my $dummy = ''; $block = $self->setBlock(\$dummy); } $$block .= $line; } elsif($line eq "__DATA__\n") # flush rest file { $out->print($line, $in->getlines); } else { $out->print($line); } } warn "WARNING: doc did not end in $input.\n" if $self->inDoc && ! $pure_pod; $self->closeChapter; $in->close && $out->close; @manuals; } #------------------------------------------- sub setBlock($) { my ($self, $ref) = @_; $self->{OPM_block} = $ref; $self->inDoc(1); $self; } #------------------------------------------- sub inDoc(;$) { my $self = shift; $self->{OPM_in_pod} = shift if @_; $self->{OPM_in_pod}; } #------------------------------------------- sub currentManual(;$) { my $self = shift; @_ ? $self->{OPM_manual} = shift : $self->{OPM_manual}; } #------------------------------------------- sub docCut($$$$) { my ($self, $match, $line, $fn, $ln) = @_; if($self->currentManual->isPurePod) { warn "The whole file $fn is pod, so =cut in line $ln is useless.\n"; return; } warn "WARNING: $match does not terminate any doc in $fn line $ln.\n" unless $self->inDoc; $self->inDoc(0); 1; } #------------------------------------------- # CHAPTER sub docChapter($$$$) { my ($self, $match, $line, $fn, $ln) = @_; $line =~ s/^\=(chapter|head1)\s+//; $line =~ s/\s+$//; $self->closeChapter; my $manual = $self->currentManual; die "ERROR: chapter $line before package statement in $fn line $ln\n" unless defined $manual; my $chapter = $self->{OPM_chapter} = OODoc::Text::Chapter->new ( name => $line , manual => $manual , linenr => $ln ); $self->setBlock($chapter->openDescription); $manual->chapter($chapter); $chapter; } sub closeChapter() { my $self = shift; my $chapter = delete $self->{OPM_chapter} or return; $self->closeSection()->closeSubroutine(); } #------------------------------------------- # SECTION sub docSection($$$$) { my ($self, $match, $line, $fn, $ln) = @_; $line =~ s/^\=(section|head2)\s+//; $line =~ s/\s+$//; $self->closeSection; my $chapter = $self->{OPM_chapter}; die "ERROR: section `$line' outside chapter in $fn line $ln\n" unless defined $chapter; my $section = $self->{OPM_section} = OODoc::Text::Section->new ( name => $line , chapter => $chapter , linenr => $ln ); $chapter->section($section); $self->setBlock($section->openDescription); $section; } sub closeSection() { my $self = shift; my $section = delete $self->{OPM_section} or return $self; $self->closeSubSection(); } #------------------------------------------- # SUBSECTION sub docSubSection($$$$) { my ($self, $match, $line, $fn, $ln) = @_; $line =~ s/^\=(subsection|head3)\s+//; $line =~ s/\s+$//; $self->closeSubSection; my $section = $self->{OPM_section}; defined $section or die "ERROR: subsection `$line' outside section in $fn line $ln\n"; my $subsection = $self->{OPM_subsection} = OODoc::Text::SubSection->new ( name => $line , section => $section , linenr => $ln ); $section->subsection($subsection); $self->setBlock($subsection->openDescription); $subsection; } sub closeSubSection() { my $self = shift; my $subsection = delete $self->{OPM_subsection}; $self; } #------------------------------------------- # SUBSECTION sub docSubSubSection($$$$) { my ($self, $match, $line, $fn, $ln) = @_; $line =~ s/^\=(subsubsection|head4)\s+//; $line =~ s/\s+$//; $self->closeSubSubSection; my $subsection = $self->{OPM_subsection}; defined $subsection or die "ERROR: subsubsection `$line' outside subsection in $fn line $ln\n"; my $subsubsection = $self->{OPM_subsubsection} = OODoc::Text::SubSubSection->new ( name => $line , subsection => $subsection , linenr => $ln ); $subsection->subsubsection($subsubsection); $self->setBlock($subsubsection->openDescription); $subsubsection; } sub closeSubSubSection() { my $self = shift; delete $self->{OPM_subsubsection}; $self; } #------------------------------------------- # SUBROUTINES sub docSubroutine($$$$) { my ($self, $match, $line, $fn, $ln) = @_; chomp $line; $line =~ s/^\=(\w+)\s+//; my $type = $1; my ($name, $params) = $type eq 'overload' ? ($line, '') : $line =~ m/^(\w*)\s*(.*?)\s*$/; my $container = $self->{OPM_subsection} || $self->{OPM_section} || $self->{OPM_chapter}; die "ERROR: subroutine $name outside chapter in $fn line $ln\n" unless defined $container; $type = 'i_method' if $type eq 'method'; my $sub = $self->{OPM_subroutine} = OODoc::Text::Subroutine->new ( type => $type , name => $name , parameters => $params , linenr => $ln , container => $container ); $self->setBlock($sub->openDescription); $container->addSubroutine($sub); $sub; } sub closeSubroutine() { my $self = shift; delete $self->{OPM_subroutine}; $self; } #------------------------------------------- # SUBROUTINE ADDITIONALS sub docOption($$$$) { my ($self, $match, $line, $fn, $ln) = @_; unless($line =~ m/^\=option\s+(\S+)\s*(.+?)\s*$/ ) { warn "WARNING: option line incorrect in $fn line $ln:\n$line"; return; } my ($name, $parameters) = ($1, $2); my $sub = $self->{OPM_subroutine}; die "ERROR: option $name outside subroutine in $fn line $ln\n" unless defined $sub; my $option = OODoc::Text::Option->new ( name => $name , parameters => $parameters , linenr => $ln , subroutine => $sub ); $self->setBlock($option->openDescription); $sub->option($option); $sub; } #------------------------------------------- sub docDefault($$$$) { my ($self, $match, $line, $fn, $ln) = @_; unless($line =~ m/^\=default\s+(\S+)\s*(.+?)\s*$/ ) { warn "WARNING: default line incorrect in $fn line $ln:\n$line"; return; } my ($name, $value) = ($1, $2); my $sub = $self->{OPM_subroutine}; die "ERROR: default for option $name outside subroutine in $fn line $ln\n" unless defined $sub; my $default = OODoc::Text::Default->new ( name => $name , value => $value , linenr => $ln , subroutine => $sub ); $sub->default($default); $sub; } sub docRequires($$$$) { my ($self, $match, $line, $fn, $ln) = @_; unless($line =~ m/^\=requires\s+(\w+)\s*(.+?)\s*$/ ) { warn "WARNING: requires line incorrect in $fn line $ln:\n$line"; return; } my ($name, $param) = ($1, $2); $self->docOption ($match, "=option $name $param", $fn, $ln); $self->docDefault($match, "=default $name ", $fn, $ln); } #------------------------------------------- # DIAGNOSTICS sub docDiagnostic($$$$) { my ($self, $match, $line, $fn, $ln) = @_; $line =~ s/^\=(\w+)\s*//; my $type = $1; $line =~ s/\s*$//; unless(length $line) { warn "WARNING: no diagnostic message supplied in $fn line $ln"; return; } my $sub = $self->{OPM_subroutine}; die "ERROR: diagnostic $type outside subroutine in $fn line $ln\n" unless defined $sub; my $diag = OODoc::Text::Diagnostic->new ( type => ucfirst($type) , name => $line , linenr => $ln , subroutine => $sub ); $self->setBlock($diag->openDescription); $sub->diagnostic($diag); $sub; } #------------------------------------------- # EXAMPLE sub docExample($$$$) { my ($self, $match, $line, $fn, $ln) = @_; $line =~ s/^=examples?\s*//; $line =~ s/^\#.*//; my $container = $self->{OPM_subroutine} || $self->{OPM_subsubsection} || $self->{OPM_subsection} || $self->{OPM_section} || $self->{OPM_chapter}; die "ERROR: example outside chapter in $fn line $ln\n" unless defined $container; my $example = OODoc::Text::Example->new ( name => ($line || '') , linenr => $ln , container => $container ); $self->setBlock($example->openDescription); $container->example($example); $example; } #------------------------------------------- sub debugRemains($$$$) { my ($self, $match, $line, $fn, $ln) = @_; warn "WARNING: Debugging remains in $fn line $ln\n" unless $self->inDoc || $self->currentManual->isPurePod; undef; } #------------------------------------------- sub forgotCut($$$$) { my ($self, $match, $line, $fn, $ln) = @_; warn "WARNING: You may have accidentally captured code in doc $fn line $ln\n" if $self->inDoc && ! $self->currentManual->isPurePod; undef; } #------------------------------------------- sub decomposeM($$) { my ($self, $manual, $link) = @_; my ($subroutine, $option) = $link =~ s/(?:^|\:\:) (\w+) \( (.*?) \)$//x ? ($1, $2) : ('', ''); my $man; if(not length($link)) { $man = $manual } elsif(defined($man = $self->manual($link))) { ; } else { eval "no warnings; require $link"; if( ! $@ || $@ =~ m/attempt to reload/i || $self->skipManualLink($link) ) { ; } elsif($@ =~ m/Can't locate/ ) { warn "WARNING: module $link is not on your system, found in $manual\n"; } else { $@ =~ s/ at \(eval.*//; warn "WARNING: use problem for module $link in $manual;\n$@"; warn " Did you use an 'M' tag on something which is not a module?\n"; } $man = $link; } unless(ref $man) { return ( $manual , $man . (length($subroutine) ? " subroutine $subroutine" : '') . (length($option) ? " option $option" : '') ); } return (undef, $man) unless defined $subroutine && length $subroutine; my $package = $self->manual($man->package); unless(defined $package) { my $want = $man->package; warn "WARNING: no manual for $want (correct casing?)\n"; return (undef, "$want subroutine $subroutine"); } my $sub = $package->subroutine($subroutine); unless(defined $sub) { warn "WARNING: subroutine $subroutine() is not defined by $package, but linked to in $manual\n"; return ($package, "$package subroutine $subroutine"); } my $location = $sub->manual; return ($location, $sub) unless defined $option && length $option; my $opt = $sub->findOption($option); unless(defined $opt) { warn "WARNING: option \"$option\" unknown for $subroutine() in $location, found in $manual\n"; return ($location, "$package subroutine $subroutine option $option"); } ($location, $opt); } sub decomposeL($$) { my ($self, $manual, $link) = @_; my $text = $link =~ s/^([^|]*)\|// ? $1 : undef; unless(length $link) { warn "WARNING: empty L link in $manual"; return (); } if($link =~ m/^[a-z]+\:[^:]/ ) { $text = $link unless defined $text; return (undef, undef, $link, $text); } my ($name, $item) = $link =~ m[(.*?)(?:/(.*))?$]; ($name, $item) = (undef, $name) if $name =~ m/^".*"$/; $item =~ s/^"(.*)"$/$1/ if defined $item; my $man = length $name ? ($self->manual($name) || $name) : $manual; my $dest; if(!ref $man) { unless(defined $text && length $text) { $text = "manual $man"; $text .= " entry $item" if defined $item && length $item; } if($man !~ m/\(\d.*\)\s*$/) { (my $escaped = $man) =~ s/\W+/_/g; $dest = "$url_modsearch$escaped"; } } elsif(!defined $item) { $dest = $man; $text = $man->name unless defined $text; } elsif(my @obj = $man->all(findEntry => $item)) { $dest = shift @obj; $text = $item unless defined $text; } else { warn "WARNING: Manual $manual links to unknown entry \"$item\" in $man\n"; $dest = $man; $text = "$man" unless defined $text; } ($man, $dest, undef, $text); } sub cleanupPod($$$) { my ($self, $formatter, $manual, $string) = @_; return '' unless defined $string && length $string; my @lines = split /^/, $string; my $protect; for(my $i=0; $i < @lines; $i++) { $protect = $1 if $lines[$i] =~ m/^=(for|begin)\s+\w/; undef $protect if $lines[$i] =~ m/^=end/; undef $protect if $lines[$i] =~ m/^\s*$/ && $protect && $protect eq 'for'; next if $protect; $lines[$i] =~ s/\bM\<([^>]*)\>/$self->cleanupPodM($formatter,$manual,$1)/ge; $lines[$i] =~ s/\bL\<([^>]*)\>/$self->cleanupPodL($formatter,$manual,$1)/ge if substr($lines[$i], 0, 1) eq ' '; # permit losing blank lines around pod statements. if(substr($lines[$i], 0, 1) eq '=') { if($i > 0 && $lines[$i-1] ne "\n") { splice @lines, $i-1, 0, "\n"; $i++; } elsif($i < $#lines && $lines[$i+1] ne "\n" && substr($lines[$i], 0, 5) ne "=for ") { splice @lines, $i+1, 0, "\n"; } } else { $lines[$i] =~ s/^\\\=/\=/; } # Remove superfluous blanks if($i < $#lines && $lines[$i] eq "\n" && $lines[$i+1] eq "\n") { splice @lines, $i+1, 1; } } # remove leading and trailing blank lines shift @lines while @lines && $lines[0] eq "\n"; pop @lines while @lines && $lines[-1] eq "\n"; @lines ? join('', @lines) : ''; } sub cleanupPodM($$$) { my ($self, $formatter, $manual, $link) = @_; my ($toman, $to) = $self->decomposeM($manual, $link); ref $to ? $formatter->link($toman, $to, $link) : $to; } sub cleanupPodL($$$) { my ($self, $formatter, $manual, $link) = @_; my ($toman, $to, $href, $text) = $self->decomposeL($manual, $link); $text; } #------------------------------------------- sub cleanupHtml($$$;$) { my ($self, $formatter, $manual, $string, $is_html) = @_; return '' unless defined $string && length $string; if($string =~ m/(?:\A|\n) # start of line \=begin\s+(:?\w+)\s* # begin statement (.*?) # encapsulated \n\=end\s+\1\s* # related end statement /xs || $string =~ m/(?:\A|\n) # start of line \=for\s+(:?\w+)\b # for statement (.*?)\n # encapsulated (\n|\Z) # end of paragraph /xs ) { my ($before, $type, $capture, $after) = ($`, lc($1), $2, $'); if($type =~ m/^\:(text|pod)\b/ ) { $type = 'text'; $capture = $self->cleanupPod($formatter, $manual, $capture); } elsif($type =~ m/^\:html\b/ ) { $type = 'html'; $capture = $self->cleanupHtml($formatter, $manual, $capture, 1); } my $take = $type eq 'text' ? "
\n". $capture . "
\n" : $type eq 'html' ? $capture : ''; # ignore return $self->cleanupHtml($formatter, $manual, $before) . $take . $self->cleanupHtml($formatter, $manual, $after); } for($string) { unless($is_html) { s#\&#\&#g; s#(?#-\>#g; } s/\bM\<([^>]*)\>/$self->cleanupHtmlM($formatter, $manual, $1)/ge; s/\bL\<([^>]*)\>/$self->cleanupHtmlL($formatter, $manual, $1)/ge; s#\bF\<([^>]*)\>#$1#g; s#\bC\<([^>]*)\>#$1#g; s#\bI\<([^>]*)\>#$1#g; s#\bB\<([^>]*)\>#$1#g; s#\bE\<([^>]*)\>#\&$1;#g; s#^\=over\s+\d+\s*#\n#gms; s#^=pod\b##gm; my ($label, $level, $title); s#^\=head([1-6])\s*([^\n]*)# ($title, $level) = ($1, $2); $label = $title; $label =~ s/\W+/_/g; qq[$title]; #ge; next if $is_html; s!(?:(?:^|\n) [^\ \t\n]+[^\n]* # line starting with blank: para )+ !

$&

!gsx; s!(?:(?:^|\n) # start of line [\ \t]+[^\n]+ # line starting with blank: pre )+ !
$&\n
!gsx; s#\n
##gs;
        s#

\n#\n

#gs; } $string; } sub cleanupHtmlM($$$) { my ($self, $formatter, $manual, $link) = @_; my ($toman, $to) = $self->decomposeM($manual, $link); ref $to ? $formatter->link($toman, $to, $link) : $to; } sub cleanupHtmlL($$$) { my ($self, $formatter, $manual, $link) = @_; my ($toman, $to, $href, $text) = $self->decomposeL($manual, $link); defined $href ? qq[$text] : !defined $to ? $text : ref $to ? $formatter->link($toman, $to, $text) : qq[$text] } #------------------------------------------- 1;