The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package App::DualLivedDiff;
use strict;
use warnings;

our $VERSION = '1.07';

use Getopt::Long;
use Parse::CPAN::Meta ();
use LWP::Simple;
use File::Temp ();
use File::Spec;
use Archive::Extract;
use File::Find ();
use CPAN ();

our $diff_cmd = 'diff';
our @exclude_files = (
  qr(\.{1,2}$),
  qr(\.svn$),
  qr(\.git$),
);

sub usage {
  print "@_\n\n" if @_;
  print <<HERE;
Usage: $0 -d source-dist -b /path/to/blead/checkout
Does a diff FROM a dual lived module distribution TO blead perl

-b/--blead   blead perl path
-d/--dual    dual lived module distribution path, archive, URL,
             or module- or distribution name (default: .)
-r/--reverse reverses the diff (blead to lib)
-c/--config  name of the configuration file with file mappings
             (defaults to .dualLivedDiffConfig in the module path or current path)
-o/--output  file name for output (defaults to STDOUT)
             useful to separate diff from CPAN.pm output
-p/--paths   show the "dual"-lived module paths or the "blead" paths?
             defaults to "blead" or to "dual" if in --reverse mode
-w/--ignore-all-space (same as for normal gnu diff)

Check perldoc "App::DualLivedDiff" for more info on the usage.
The "diff" command is assumed to be in your PATH and will be run with the
-u and -N options by default.
HERE
  exit(1);
}

my (
  $bleadpath, $dualmodule, $reverse,
  $default_config_file, $config_file,
  $output_file, $paths, $ignore_space
);

sub run {
  $bleadpath           = undef;
  $dualmodule          = '.';
  $reverse             = 0;
  $default_config_file = '.dualLivedDiffConfig';
  $config_file         = $default_config_file;
  $output_file         = undef;
  $paths               = undef;
  $ignore_space        = undef;
  GetOptions(
    'b|blead=s'                  => \$bleadpath,
    'h|help'                     => \&usage,
    'r|reverse'                  => \$reverse,
    'd|dual=s'                   => \$dualmodule,
    'c|conf|config|configfile=s' => \$config_file,
    'o|out|output=s'             => \$output_file,
    'p|path|paths=s'             => \$paths,
    'w|ignore-all-space'         => \$ignore_space,
  );

  if (defined $output_file) {
    open my $fh, '>', $output_file or die "Could not open file '$output_file' for writing: $!";
    $output_file = $fh;
  }

  usage() if not defined $bleadpath or not -d $bleadpath;

  if (not defined $paths) {
    $paths = $reverse ? "blead" : "dual";
  }
  else {
    if ($paths =~ /^blead$/i) {
      $paths = 'blead';
    } elsif ($paths =~ /^dual$/i) {
      $paths = 'dual';
    } else {
      die "Invalid path setting: --paths must be either 'dual' or 'blead'!\n";
    }
  }

  my $workdir         = get_dual_lived_distribution_dir($dualmodule);
  my $config          = get_config($workdir, $config_file);

  my $files           = $config->{"files"} || {};
  my $exclude_regexes = [ map {qr/$_/} @{$config->{"exclude-regexes"} || []} ];
  my $dirs_flat       = $config->{"dirs-flat"} || {};
  my $dirs_recursive  = $config->{"dirs-recursive"} || {};
  
  my $blead_module_base_path = $config->{"base-path-in-blead"};

  my $pathspec = {
    blead_path => $bleadpath,
    source_path => $workdir,
    blead_module_path => $blead_module_base_path,
  };

  foreach my $source_file (keys %$files) {
    # commented out since explicitly mapped files trump exclusion
    #if (grep {$source_file =~ $_} @$exclude_regexes) { next; }
    my $blead_file = $files->{$source_file};
    $pathspec->{blead_file} = $blead_file;
    $pathspec->{source_file} = $source_file;

    my $absolute_source_file = File::Spec->catdir($workdir, $source_file);

    if (-f $absolute_source_file) {
      file_diff( $output_file, $pathspec, $paths );
    }
    elsif (-d $absolute_source_file) {
      warn "'$absolute_source_file' is not a file but a directory. Use the 'dirs-flat' or 'dirs-recursive' config options instead!";
      next;
    }
    else {
      warn "Explicitly mapped file '$source_file' missing from dual lived module source tree!";
      next;
    }
  }

  foreach my $source_dir (keys %$dirs_flat) {
    if (grep {$source_dir =~ $_} @$exclude_regexes) {
      warn "Explicitly mapped directory '$source_dir' is also excluded explicitly. Skipping it.";
      next;
    }
    my $blead_dir = $dirs_flat->{$source_dir};
    $pathspec->{blead_file} = $blead_dir;
    $pathspec->{source_file} = $source_dir;

    my $absolute_source_dir = File::Spec->catdir($workdir, $source_dir);
    if (-f $absolute_source_dir) {
      warn "'$absolute_source_dir' is not a directory but a file. Use the 'files' config option instead!";
      next;
    }
    elsif (-d $absolute_source_dir) {
      dir_diff( $output_file, $pathspec, $paths, 0, $exclude_regexes );
    }
    else {
      warn "Explicitly mapped directory '$source_dir' missing from dual lived module source tree!";
      next;
    }
  }

  foreach my $source_dir (keys %$dirs_recursive) {
    if (grep {$source_dir =~ $_} @$exclude_regexes) {
      warn "Explicitly mapped directory '$source_dir' is also excluded explicitly. Skipping it.";
      next;
    }
    my $blead_dir = $dirs_recursive->{$source_dir};
    $pathspec->{blead_file} = $blead_dir;
    $pathspec->{source_file} = $source_dir;

    my $absolute_source_dir = File::Spec->catdir($workdir, $source_dir);
    if (-f $absolute_source_dir) {
      warn "'$absolute_source_dir' is not a directory but a file. Use the 'files' config option instead!";
      next;
    }
    elsif (-d $absolute_source_dir) {
      dir_diff( $output_file, $pathspec, $paths, 1, $exclude_regexes );
    }
    else {
      warn "Explicitly mapped directory '$source_dir' missing from dual lived module source tree!";
      next;
    }
  }
}

# given a source specification, get the path to an extracted distribution
sub get_dual_lived_distribution_dir {
  my $source = shift;
  usage("Bad source of the dual lived module distribution '$source'")
    if not defined $source;
  
  my $distfile;
  if (-d $source) {
    # already extracted or checkout
    return $source;
  }
  elsif (-f $source) {
    # distribution file
    $distfile = $source;
  }
  elsif ($source =~ m{^(?:ftp|https?)://}) {
    $distfile = download_distribution($source);
  }
  elsif ($source =~ m{^[^:/]+://}) {
    die "Support for VCS checkout and fancy protocols not implemented";
  }
  else {
    # fallback, treat as module or distribution
    my $url = module_or_dist_to_url($source);
    die "Could not find CPAN module of that name ($source)" if not defined $url;
    $distfile = download_distribution($url);
  }

  # extract distribution
  my $tmpdir = File::Temp::tempdir( CLEANUP => 1 );
  my $ae = Archive::Extract->new( archive => $distfile );
  $ae->extract( to => $tmpdir )
    or die "Failed to extract distribution '$distfile' to temp. dir: " . $ae->error();

  # find the extracted distribution dir
  opendir my $dh, $tmpdir
    or die "Could not opendir '$tmpdir': $!";
  my @stuff = readdir($dh);
  my @files = grep {-f File::Spec->catfile($tmpdir, $_)} @stuff;
  my @dirs  = grep {!/^\.\.?$/ and -d File::Spec->catdir($tmpdir, $_)} @stuff;
  closedir $dh;

  if (@files or @dirs != 1) {
    die "Failed to find extracted distribution directory in '$tmpdir'. Found ".scalar(@files)." files and ".scalar(@dirs)." dirs";
  }

  return File::Spec->catdir($tmpdir, shift(@dirs)); 
}

sub download_distribution {
  my $url = shift;
  my $disttmpdir = File::Temp::tempdir( CLEANUP => 1 );
  $url =~ m{/([^/]+)$} or die;
  my $file = File::Spec->catfile($disttmpdir, $1);
  if (is_success(getstore( $url, $file ))) {
    return $file;
  }
  else {
    die "Could not fetch '$url'";
  }
}

# find and load the configuration file
sub get_config {
  my $source_dir = shift;
  my $config_file = shift;
  my $yaml;
  if (-f $config_file) {
    $yaml = Parse::CPAN::Meta::LoadFile($config_file);
  }
  elsif ( -f File::Spec->catfile($source_dir, $config_file) ) {
    $yaml = Parse::CPAN::Meta::LoadFile(
      File::Spec->catfile($source_dir, $config_file)
    );
  }
  elsif ( -f File::Spec->catfile($source_dir, $default_config_file) ) {
    $yaml = Parse::CPAN::Meta::LoadFile(
      File::Spec->catfile($source_dir, $default_config_file)
    );
  }
  else {
    die "Could not find nor load configuration file";
  }

  $yaml = $yaml->[0] if ref($yaml) eq 'ARRAY';

  return $yaml;
}

# given the two base dirs and two relative paths, transform a
# directory mapping into file mappings recursively
sub dirs_to_filemapping {
  my $pathspec        = shift;
  my $recursive       = shift;
  
  my $full_source_dir = File::Spec->catdir($pathspec->{source_path}, $pathspec->{source_file});
  my $full_blead_dir  = get_full_blead_path($pathspec, $pathspec->{blead_file});

  if (not -d $full_blead_dir) {
    warn "Specified directory '$pathspec->{blead_file}' could not be found in blead perl source tree (as $full_blead_dir)!";
    return();
  }
  if (not -d $full_source_dir) {
    warn "Specified directory '$pathspec->{source_file}' could not be found in dual lived module source tree (as $full_source_dir)!";
    return();
  }

  my @source_files = $recursive ? recur_get_all_files($full_source_dir) : get_all_files($full_source_dir);
  if (!@source_files) {
    warn "Specified source directory '$pathspec->{source_file}' does not contain any files!";
    return({});
  }

  my $mapping = {};
  $mapping->{File::Spec->catfile($pathspec->{source_file}, $_)} = File::Spec->catfile($pathspec->{blead_file}, $_)
    for @source_files;

  return $mapping;
}

# get all files in a path with relative paths
sub recur_get_all_files {
  my $path = shift;

  my @files;
  return() if not -d $path;
  
  File::Find::find(
    {
      preprocess => sub {
        my @return;
        FILE: foreach my $file (@_) {
          foreach my $exclude_regex (@exclude_files) {
            next FILE if $file =~ $exclude_regex;
          }
          push @return, $file;
        }
        return(@return);
      },
      wanted => sub {
        foreach my $exclude_regex (@exclude_files) {
          return if $_ =~ $exclude_regex;
        }
        return unless -f $_;
        s{^\Q$path\E[\\/]*}{};
        push @files, $_;
      },
      no_chdir => 1,
    },
    $path
  );
  return(@files);
}

# get all files in a path with relative paths
sub get_all_files {
  my $path = shift;

  return() if not -d $path;
  
  opendir my $dh, $path or die "Could not open path '$path': $!";
  my @files = readdir($dh);
  closedir $dh;

  my @use_files;
  FILE: foreach my $file (@files) {
    foreach my $exclude_regex (@exclude_files) {
      next FILE if $file =~ $exclude_regex;
    }
    push @use_files, $file if -f File::Spec->catfile($path, $file);
  }

  return(@use_files);
}

# produce the diff of a full directory
sub dir_diff {
  my $output_file     = shift;
  my $pathspec        = shift;
  my $paths           = shift;
  my $recursive       = shift;
  my $exclude_regexes = shift;

  my $map = dirs_to_filemapping( $pathspec, $recursive );

  foreach my $source_file (keys %$map) {
    next if grep {$source_file =~ $_} @$exclude_regexes;
    my $pathspec = {%$pathspec};
    $pathspec->{source_file} = $source_file;
    $pathspec->{blead_file} = $map->{$source_file};
    file_diff( $output_file, $pathspec, $paths );
  }
}

# produce the diff of a single file
sub file_diff {
  my $output_file     = shift;
  my $pathspec        = shift;
  my $paths           = shift;

  my $absolute_source_file = File::Spec->catfile($pathspec->{source_path}, $pathspec->{source_file});
  my $absolute_blead_file  = get_full_blead_path( $pathspec, $pathspec->{blead_file} );
  #warn "Diffing '$absolute_source_file' to '$absolute_blead_file'";

  my @cmd = ($diff_cmd, ($ignore_space ? ('-w') : ()), qw(-u -N));
  if ($reverse) {
    push @cmd, $absolute_blead_file, $absolute_source_file;
  }
  else {
    push @cmd, $absolute_source_file, $absolute_blead_file;
  }
  my $result = `@cmd`;
  my $blead_prefix = quotemeta($reverse ? '---' : '+++');
  my $source_prefix = quotemeta($reverse ? '+++' : '---');

  my $patched_filename;
  my $bleadpath_patched_filename =
    defined($pathspec->{blead_module_path})
    ? File::Spec->catfile( $pathspec->{blead_module_path}, $pathspec->{blead_file} )
    : $pathspec->{blead_file};

  if ($paths eq 'dual') {
    $patched_filename = $pathspec->{source_file};
  } elsif ($paths eq 'blead') {
    $patched_filename = $bleadpath_patched_filename;
   } else {
    $patched_filename = $reverse ? $bleadpath_patched_filename : $pathspec->{source_file};
  }
  #my $patched_filename = $reverse ? $source_file : $blead_file;

  #$result =~ s{^($blead_prefix\s*)(\S+)}{$1 . remove_path_prefix($2, $blead_base_dir)}gme;
  #$result =~ s{^($source_prefix\s*)(\S+)}{$1 . remove_path_prefix($2, $source_base_dir)}gme;
  
  $result =~ s{^($blead_prefix\s+)(\S+)}{$1 . $patched_filename}gme;
  $result =~ s{^($source_prefix\s+)(\S+)}{$1 . $patched_filename}gme;

  if (defined $output_file) {
    print $output_file $result;
  }
  else {
    print $result;
  }
}

# remove a prefix from a path
sub remove_path_prefix {
  my $path   = shift;
  my $prefix = shift;
  $path =~ s/^\Q$prefix\E//;
  $path =~ s/^[\/\\]+//;
  return $path;
}

# turn something that may look like a module or
# distribution into an URL using CPAN
sub module_or_dist_to_url {
  my $module_name = shift;
  #my $use_dev_versions = shift;

  my $distro;
  if ($module_name =~ /[\/.]/) {
    my $dist = CPAN::Shell->expandany($module_name);
    if (not defined $dist) {
      warn "Could not find distribution '$module_name' on CPAN\n";
      return();
    }
    $dist = $dist->distribution() if ref($dist) eq 'CPAN::Module';
    if (not ref($dist) eq 'CPAN::Distribution') {
      warn "Could not find distribution '$module_name' on CPAN\n";
      return();
    }
    $distro = $dist->pretty_id();
    warn "Assuming you specified a distribution name. Found the '$distro' distribution on CPAN\n";
  }
  else {
    my $module = CPAN::Shell->expand("Module", $module_name);
    if (not defined $module) {
      warn "Could not find module '$module_name' on CPAN\n";
      return();
    }
    $distro = $module->distribution()->pretty_id();
    warn "Assuming you specified a module name. Found the '$distro' distribution on CPAN\n";
  }

  $distro =~ /^([^\/]+)/ or die;
  $distro = substr($1, 0, 1) . "/" . substr($1, 0, 2) . "/" . $distro;

  my $mirrors = $CPAN::Config->{urllist};
  if (not defined $mirrors or not ref($mirrors) eq 'ARRAY' or not @$mirrors) {
    warn "Could not determine CPAN mirror";
    return();
  }

  my $url = $mirrors->[0];
  $url =~ s/\/+$//;
  return $url . '/authors/id/' . $distro;
}

sub get_full_blead_path {
  my $pathspec = shift;
  my $path = shift;
  if (defined $pathspec->{blead_module_path}) {
    return File::Spec->catdir($pathspec->{blead_path}, $pathspec->{blead_module_path}, $path);
  }
  else {
    return File::Spec->catdir($pathspec->{blead_path}, $path);
  }
}

1;
__END__

=head1 NAME

App::DualLivedDiff - Diff between the perl core and dual-lived modules' CPAN distributions

=head1 SYNOPSIS

Example: Filter::Simple.

Given a simple YAML file F<.dualLivedDiffConfig> in the current working directory
or the Filter::Simple CPAN distribution:

  ---
  exclude-regexes:
    - ^(?:\./)?MANIFEST$
    - ^(?:\./)?META.yml$
  files:
    lib/Filter/Simple.pm: lib/Filter/Simple.pm
    Changes: lib/Filter/Simple/Changes
  dirs-flat:
    t/: lib/Filter/Simple/t/
  dirs-recursive:
    t/lib/Filter/Simple/: t/lib/Filter/Simple/

By running the following command, you can get the diff between your blead perl checkout and
the CPAN distribution:

  dualLivedDiff --dual http://search.cpan.org/CPAN/authors/id/S/SM/SMUELLER/Filter-Simple-0.84.tar.gz --blead $HOME/perl-ssh

Or this if you have CPAN.pm configured:

  dualLivedDiff --dual SMUELLER/Filter-Simple-0.84.tar.gz --blead $HOME/perl-ssh

Or this if you want to search for a given module name:

  dualLivedDiff --dual Filter::Simple --blead $HOME/perl-ssh

You can use the C<base-path-in-blead> option in the YAML config file to set a base path
within the blead-perl checkout. Example of the configuration for the Attribute::Handlers
dual-lived module/distribution which lives entirely within F<ext/Attribute-Handlers>:

  ---
  base-path-in-blead: ext/Attribute-Handlers
  files:
    Changes: Changes
    README: README
  dirs-recursive:
    lib/: lib/
    t/: t/
    demo/: demo/

Run the "dualLivedDiff" program to get an overview of the command line options.

=head1 DESCRIPTION

Very early version of a tool to automatically generate diffs/patches between CPAN distributions
of dual lived Perl modules and the perl core. The code isn't beautiful. It's a hack.

=head1 AUTHOR

Steffen Mueller, E<lt>smueller@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2009 by Steffen Mueller

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8 or,
at your option, any later version of Perl 5 you may have available.

=cut