=head1 NAME: Rcs::Parser - Parse and analyze RCS files. =head1 SYNOPSIS: A basic RCS parser. This file does not rely upon any external utilities to parse RCS files. Currently it functions with RCS files generated by GNU CVS and as documented by the rcsfile(5) man page. =head1 EXAMPLE USAGE: Retrieve the most recent file from an archive: my $rcs = new Rcs::Parser; my $ret = $rcs->load($filename); my $current = $rcs->recent_version; To retrieve a specific version: my $rcs = new Rcs::Parser; my $ret = $rcs->load($filename); my $specific_version = $rcs->get('1.2'); =cut # shorthand document of memory structure # # - version # - body # - line # # - line # - origin # - new_lines # - line_map #************************************************************************* package Rcs::Parser; our $VERSION = '0.03'; use 5.006; use warnings; use strict; sub new { bless {}, $_[0] } =head1 METHODS: =head2 author($version) my $author = $rcs->author($version); The author method returns the author name of the given version. If no version is given it returns the author of the current loaded version. =cut sub author { my $self = shift @_; my $ver = shift @_ || $self->version; return $self->{rcs}->{$ver}->{author} || undef; } =head2 date($version) my $date = $rcs->date($version); The date method returns the revision date of the given version. If no version is given it returns the date of the current version. =cut sub date { my $self = shift @_; my $ver = shift @_ || $self->recent_version; return $self->{rcs}->{$ver}->{date} || undef; } =head2 get($version) my $ret = $rcs->get($version); This method causes the given version to be retrieved from the archive =cut sub get { my $self = shift @_; my $ver = shift @_ || $self->recent_version; return undef unless $self->{rcs}->{$ver}; my @chain = $self->_revision_path($self->{current_document}->{version},$ver); for my $version ( @chain ) { $self->_debug("--> loading delta $version"); $self->_apply_delta($version); $self->_sort; } return $self->_dump; } =head2 load($filename) my $ret = $rcs->load($filename); The load command reads in and parses the given filename. If the file does not exist or is unreadable by the script, undef is returned. Otherwise, 1 is returned upon success. =cut sub load { my $self = shift @_; $self->{file} = shift @_; return undef unless -f $self->{file}; my $doc_header; $self->{rcs} = {}; open RCSFILE, '<', $self->{file}; my @raw = ; close RCSFILE; $self->{rawfile} = \@raw; # The rawfile is steadily deleted as it is parsed $self->_parse_in_rcs($self->{rcs}); # populate the current doc $self->{current_document}->{version} = $self->recent_version; for my $line ( split /\n/, $self->{rcs}->{$self->recent_version}->{text} ) { push @{ $self->{current_document}->{body}->{1}->{new_lines} }, $self->_unquote($line) . "\n"; } # resort it $self->_sort; return 1; } =head2 load_scalar($data) my $ret = $rcs->load_scalar($data); The load command reads in and parses the provided RCS file in scalar form. 1 is returned upon success. =cut sub load_scalar { my $self = shift @_; my $data = shift @_; my $doc_header; $self->{rcs} = {}; my @raw = map { "$_\n" } split "\n", $data; $self->{rawfile} = \@raw; # The rawfile is steadily deleted as it is parsed $self->_parse_in_rcs($self->{rcs}); # populate the current doc $self->{current_document}->{version} = $self->recent_version; for my $line ( split /\n/, $self->{rcs}->{$self->recent_version}->{text} ) { push @{ $self->{current_document}->{body}->{1}->{new_lines} }, $self->_unquote($line) . "\n"; } # resort it $self->_sort; return 1; } =head2 notate() my $ret = $rcs->notate() This method builds and assembled statistical information for verions. =cut sub notate { my $self = shift @_; my $ver = $self->recent_version; my $note = {}; return undef unless $self->{rcs}->{$ver}; $self->_grab_note($note); my @chain = $self->_revision_path($self->{current_document}->{version}); for my $version ( @chain ) { $self->_debug("--> loading delta $version"); $self->_apply_delta($version); $self->_sort; $self->_grab_note($note); } for my $version ( reverse @chain, $self->recent_version ) { if ( $version eq $chain[$#chain] ) { $self->_debug("Mapping $version pro facia ..."); map { $note->{$version}->{body}->{$_}->{origin} = $version; } keys %{$note->{$version}->{body}}; } else { $self->_debug("Mapping $version via line count ..."); my $error; for my $line ( keys %{$note->{$version}->{body}} ) { my $author_ver = $version; my $test_line = $line; my $test_ver = $self->previous_version($version); $error++ unless defined $test_ver; while ( $note->{$test_ver}->{line_map}->{$test_line} ) { $author_ver = $test_ver; $test_line = $note->{$test_ver}->{line_map}->{$test_line}; $test_ver = $self->previous_version($author_ver) || last; #??? break if we bottom on version? } $note->{$version}->{body}->{$line}->{origin} = $author_ver; } warn "WARN: $error lines didn't map well for $self->{file} $version" if $error > 0; } } return $note; } sub _grab_note { my $self = shift @_; my $ref = shift @_; my $ver = $self->{current_document}->{version}; for my $line ( keys %{ $self->{current_document}->{body} } ) { $ref->{$ver}->{body}->{$line}->{line} = length $self->{current_document}->{body}->{$line}->{line}; $ref->{$ver}->{body}->{$line}->{origin} = $self->{current_document}->{body}->{$line}->{origin}; } for my $map ( keys %{ $self->{current_document}->{line_map} } ) { $ref->{$ver}->{line_map}->{$map} = $self->{current_document}->{line_map}->{$map}; } return 1; } =head2 all_versions() my @versions = $rcs->all_versions(); This method returns an array or arrayref of all versions stored in the RCS file. =cut sub all_versions { my $self = shift @_; our @versions; unless ( defined @versions ) { @versions = sort { my @a = split '\.', $a; my @b = split '\.', $b; $b[0] <=> $a[0] || $b[1] <=> $a[1] } grep !/header/, keys %{$self->{rcs}}; } return wantarray ? @versions : \@versions; } =head2 previous_version() my $ver = $rcs->previous_version(); This method returns the version previous to the currently instanced version. =cut sub previous_version { my $self = shift @_; my $ver = shift @_ || $self->recent_version; return $self->{rcs}->{revision_path}->{$ver}; } =head2 recent_version() my $ver = $rcs->recent_version(); This method returns the most current revision of the file. =cut sub recent_version { my $self = shift @_; return $self->{rcs}->{header}->{head}; } =head2 version() my $ver = $rcs->version(); This method returns the currently instanced version. =cut sub version { my $self = shift @_; return $self->{current_document}->{version}; } #### Private methods sub _apply_delta { my $self = shift @_; my $ver = shift @_; my $doc = shift @_ || $self->{current_document}; my $raw_delta = $self->{rcs}->{$ver}->{text}; $doc->{version} = $ver; my @deltas = split /\n/, $raw_delta; while ( @deltas ) { my $delta = shift @deltas; if ( $delta =~ /^a(\d+) (\d+)$/ ) { my $line = $1; my $count = $2; my $test = 0; for ( my $c = 0; $c < $count; $c++ ) { my $new_line = $self->_unquote( shift @deltas ) . "\n"; push @{ $doc->{body}->{$line}->{new_lines} }, $new_line; $test++; } $self->_debug("added $test lines at $line ($count directed)"); } elsif ( $delta =~ /^d(\d+) (\d+)$/ ) { my $first_line = $1; my $last_line = $1 + $2 - 1; for my $line ( $first_line .. $last_line ) { $doc->{body}->{$line}->{line} = undef; } $self->_debug("deleting lines $first_line through $last_line ($2 directed)"); } else { warn "ORPHAN DELTA COMMAND! $delta\n"; } } return 1; } sub _create_full_revision_path { my $self = shift @_; $self->_debug('Building full revision path for reference...'); $self->{rcs}->{revision_path} = {}; $self->{rcs}->{reverse_revision_path} = {}; my $version = $self->recent_version; while ( my $next = $self->{rcs}->{$version}->{next} ) { $self->{rcs}->{revision_path}->{$version} = $next; $self->_debug(" $version -> $next"); $version = $next; } #map { $self->{rcs}->{reverse_revision_path}->{$self->{rcs}->{revision_path}->{$_}} = $_ } keys %{$self->{rcs}->{revision_path}}; $self->_debug('Built a revision path of ' . scalar( keys %{$self->{rcs}->{revision_path}} ) . ' jumps.'); } sub _debug { my $self = shift @_; my $mesg = shift @_; print "DEBUG: $mesg\n" if $self->{debug}; } sub _dump { my $self = shift @_; my $doc = shift @_ || $self->{current_document}; return join '', map { $doc->{body}->{$_}->{line} } sort { $a <=> $b } keys %{ $doc->{body} }; } sub _parse_in_rcs { my $self = shift @_; my $rcs = shift @_; ### Parse in the RCS file header my $rcs_header; while ( my $line = shift @{$self->{rawfile}} ) { last if $line =~ /^$/; $rcs_header .= $line; } for my $chunk ( split /;/, $rcs_header ) { $chunk =~ s/\n//g; $chunk =~ /^\s*(\w+)\s*(.*)$/; $rcs->{header}->{$1} = $2 if $1; } $rcs_header = undef; ### Parse in the individual version headers my $version = 'error'; while ( my $line = shift @{$self->{rawfile}} ) { last if $line =~ /^desc$/; if ( $line =~ /^([\d|\.]+)/ ) { $version = $1; } else { next if $line =~ /^$/; $rcs->{$version}->{header} .= $line; } } my $garbage_double_quote = shift @{$self->{rawfile}}; ### Parse in the individual deltas $version = 'error'; my $directive = 0; my $quote = 0; while ( my $line = shift @{$self->{rawfile}} ) { $version = $1 if $line =~ /^([\d|\.]+)/ && $quote == 0; if ( $line =~ /^\@(?!\@)/ ) { $quote = $quote == 0 ? 1 : 0; } next if $line =~ /^$/ && $quote == 0; if ( $quote == 0 ) { $line =~ /^(.+)$/; $directive = $1; } else { $rcs->{$version}->{$directive} .= $line; } #$rcs{$version}{raw_text} .= $line; $self->_debug("Line for delta in $version : $line"); } ### clean the leftover quoting for my $version ( keys %$rcs ) { for my $directive ( keys %{$rcs->{$version}} ) { $rcs->{$version}->{$directive} = $1 if $rcs->{$version}->{$directive} =~ /^\@(.*)$/s; $rcs->{$version}->{$directive} = $1 if $rcs->{$version}->{$directive} =~ /^(.*)\@$/s; } } ### disassemble header for my $version ( keys %$rcs ) { my @commands; @commands = split ';', $rcs->{$version}->{header} if defined $rcs->{$version}->{header}; for my $command ( @commands ) { chomp $command; $rcs->{$version}->{$1} = $2 if $command =~ /\s*(.+)\s+(.+)$/; } } return 1; } sub _revision_path { my $self = shift @_; my $ver = shift @_ || $self->recent_version; my $stop = shift @_ || undef; return undef unless $self->{rcs}->{$ver}; return () if $ver eq $stop; $self->_debug('Checking revision path...'); $self->_create_full_revision_path() unless $self->{rcs}->{revision_path}; my @chain; while ( my $next = $self->{rcs}->{revision_path}->{$ver} ) { push @chain, $next; last if $next eq $stop; $ver = $next; } $self->_debug("CHAIN: " . (join ' -> ', @chain)); return @chain; } sub _sort { my $self = shift @_; my %copy = %{ $self->{current_document} }; $self->{current_document} = {}; $self->{current_document}->{version} = $copy{version}; my $count = 1; for my $line_num ( sort { $a <=> $b } keys %{ $copy{body} } ) { # add basic line if ( $copy{body}{$line_num}{line} ) { $self->{current_document}->{body}->{$count}->{line} = $copy{body}{$line_num}{line}; $self->{current_document}->{line_map}->{$count} = $line_num; $count++; } # add new lines if existant if ( $copy{body}{$line_num}{new_lines} ) { for my $line ( @{ $copy{body}{$line_num}{new_lines} } ) { $self->{current_document}->{body}->{$count}->{line} = $line; $count++; } } } %copy = (); $self->_debug( "($self->{current_document}->{version}) " . --$count . ' lines sorted...'); return 1; } sub _unquote { my $self = shift @_; my $in = shift @_; $in =~ s/\@\@/\@/g; return $in; } 1; =head1 KNOWN ISSUES: Beta Code: This code is beta. It has yet to fully understand binary formats stored in RCS and will treat them as text. Consquently, you'll see warnings. That being said, there shouldn't be any large scale bugs that will cause segfaulting or crashing. Only warnings. The RCS file format: There is an astounding lack of good documentation of the RCS format. About the only thing that can be found is the rcsfile(5) man page. The layout is mostly reverse engineered in this module. I have yet to have the time, or the skill and patience to disassemble the RCS portions of the code for GNU CVS. =head1 ERATTA: Q: Why 'Rcs' and not 'RCS' A: Because the any directory named 'RCS' is usually ignored by most versioning software and some developer tools. =head1 AUTHORSHIP: Rcs::Parser v0.02 2004/04/02 (c) 2001-2008, Phillip Pollard Released under the Perl Artistic License