#!/usr/bin/env perl # ABSTRACT: analyze several HTML documents based on the same template # PODNAME: untemplate use 5.010; use strict; use utf8::all; use warnings qw(all); use Carp qw(croak); use File::Basename; use File::Temp; use Getopt::Long; use HTML::Linear; use IO::Interactive qw(is_interactive); use Class::Load qw(try_load_class); use Pod::Usage; use Term::ANSIColor qw(:constants); use Tie::IxHash; ## no critic (ProhibitDeepNests, ProhibitPackageVars) our $VERSION = '0.016'; # VERSION GetOptions( q(help) => \my $help, q(color!) => \my $color, q(html!) => \my $html, q(encoding=s) => \my $encoding, q(partial!) => \my $partial, q(shrink!) => \my $shrink, q(strict!) => \my $strict, q(unmangle=s) => \my @unmangle, ) or pod2usage(q(-verbose) => 1); pod2usage(q(-verbose) => 1) if $help or $#ARGV < 1; $color //= is_interactive(*STDOUT); if ($html) { (%HTML::Linear::Path::xpath_wrap) = (%{$HTML::Linear::Path::Colors::scheme{html}}); $color = 0; print $HTML::Linear::Path::Colors::html[0]; } elsif ($color) { (%HTML::Linear::Path::xpath_wrap) = (%{$HTML::Linear::Path::Colors::scheme{terminal}}); $html = 0; } try_load_class('YADA') and fetch_documents(); tie my %elem, 'Tie::IxHash'; parse_files(\%elem); tie my %xpath, 'Tie::IxHash'; build_xpath(\%elem, \%xpath); for my $xpath (keys %xpath) { dump_diffs($xpath, \%xpath); } print $HTML::Linear::Path::Colors::html[1] if $html; sub fetch_documents { my (@local, @remote); for (@ARGV) { if (m{^https?://}x) { push @remote, $_; } else { push @local, $_; } } return unless @remote; ## no critic (RequireLocalizedPunctuationVars) @ARGV = @local; my $q = YADA->new; for (@remote) { my $tmp = File::Temp->new( SUFFIX => '.html', TEMPLATE => 'doc-XXXX', TMPDIR => 1, ); $q->append(sub { YADA::Worker->new({ initial_url => $_, on_init => sub { $_[0]->setopt(writedata => $tmp); }, on_finish => sub { $tmp->flush; push @ARGV, $tmp unless $_[0]->has_error; }, }) }); } $q->wait; return; } sub parse_files { my ($elem) = @_; for my $file (@ARGV) { my $hl = HTML::Linear->new; $hl->set_shrink if $shrink // 1; $hl->set_strict if $strict // 0; open(my $fh, '<:' . ($encoding ? "encoding($encoding)" : 'utf8' ), $file) or croak "Can't open $file: $!"; $hl->parse_file($fh); close $fh; push @{$elem->{$_}}, [ $_ => basename($file) ] for $hl->as_list; } return; } sub build_xpath { my ($elem, $xpath) = @_; while (my ($key, $list) = each %$elem) { for (@{$list}) { my ($el, $file) = @{$_}; if (@unmangle) { for my $path (@{$el->path}) { for my $attr (keys %{$path->attributes}) { ## no critic (ProtectPrivateSubs) next unless HTML::Linear::Path::_isgroup($el->path->[-1], $attr); for my $unmangle (@unmangle) { $path->attributes->{$attr} =~ s/$unmangle//x; } } } } my $hash = $el->as_hash; ++$xpath->{$_}->{$hash->{$_}}{$file} for keys %{$hash}; } } return; } sub dump_diffs { my ($xpath, $xpath_ref) = @_; my %file; my $m = 0; my $n = 0; for my $p (keys %{$xpath_ref->{$xpath}}) { for my $q (keys %{$xpath_ref->{$xpath}->{$p}}) { push @{$file{$q}}, $p; ++$m; } ++$n; } my $flag = 0; $flag = 1 if $n == $m / scalar @ARGV; $flag = 1 if not ($partial // 0) and scalar keys %file != scalar @ARGV; return if $flag; if (1 < scalar keys %file) { if ($html) { say '