package LUGS::Events::Parser::Filter; use strict; use warnings; use boolean qw(true); use HTML::Entities qw(decode_entities); use HTML::Parser (); our $VERSION = '0.01'; my (@tags, @stack); sub _init_parser { my $self = shift; my $parser = HTML::Parser->new( api_version => 3, start_h => [ \&_start_tag, 'tagname,attr,attrseq' ], text_h => [ \&_text_tag, 'dtext' ], end_h => [ \&_end_tag, 'tagname' ], ); return $parser; } sub _parse_html { my $self = shift; my ($chunk, $html) = @_; $self->{parser}->parse($chunk); undef @stack; return unless scalar @tags; @$html = @tags; undef @tags; } sub _eof_parser { my $self = shift; $self->{parser}->eof; } sub _start_tag { my ($tagname, $attr, $attrseq) = @_; push @stack, { name => $tagname, attr => $attr, attrseq => $attrseq }; } sub _text_tag { my ($dtext) = @_; return unless scalar @stack; $stack[-1]->{text} = $dtext; } sub _end_tag { my ($tagname) = @_; return unless scalar @stack; if ($stack[-1]->{name} eq $tagname) { push @tags, { $tagname => { map { $_ => $stack[-1]->{$_} } qw(text attr attrseq), }, }; pop @stack; } } sub _rewrite_tags { my $self = shift; my ($fields) = @_; foreach my $field (keys %{$fields->{_html}}) { my %rewritten; foreach my $html (@{$fields->{_html}->{$field}}) { foreach my $tag (keys %$html) { my @tagnames; if (scalar keys %{$html->{$tag}->{attr}}) { foreach my $attr (keys %{$html->{$tag}->{attr}}) { if (exists $self->{Tag_handlers}->{"$tag $attr"}) { push @tagnames, "$tag $attr"; } } } else { if (exists $self->{Tag_handlers}->{$tag}) { push @tagnames, $tag; } } foreach my $tagname (@tagnames) { foreach my $handler (@{$self->{Tag_handlers}->{$tagname}}) { if ($self->_field_rewrite($field, $handler)) { unless (exists $rewritten{$tagname}) { $rewritten{$tagname} = true; } my $subst = $handler->{rewrite}; foreach my $subst_item ($self->_subst_data($html, $tag)) { next unless defined $subst_item->[1]; my ($identifier, $replacement) = @$subst_item; my $place_holder = uc $identifier; $subst =~ s/\$$place_holder/$replacement/; } my $re = $self->_subst_pattern($html, $tag); my ($text) = $fields->{$field} =~ $re; my $replace = defined $html->{$tag}->{text} ? $subst : $text; $fields->{$field} =~ s{$re}{$replace}; } } } } } foreach my $tagname (grep !$rewritten{$_}, keys %{$self->{Tag_handlers}}) { foreach my $handler (@{$self->{Tag_handlers}->{$tagname}}) { if ($self->_field_rewrite($field, $handler)) { if ($tagname !~ /\b\s+?\b/ && $fields->{$field} =~ m{<$tagname>} && $fields->{$field} !~ m{} ) { my $subst = $handler->{rewrite}; $fields->{$field} =~ s{<$tagname>}{$subst}g; } } } } } } sub _strip_text { my $self = shift; my ($fields) = @_; foreach my $field (grep !/^\_/, keys %$fields) { foreach my $item (@{$self->{Strip_text}}) { $fields->{$field} =~ s/\Q$item\E//gi; } } } sub _decode_entities { my $self = shift; my ($fields) = @_; foreach my $field (grep !/^\_/, keys %$fields) { decode_entities($fields->{$field}); } } sub _field_rewrite { my $self = shift; my ($field, $handler) = @_; my %rewrite = map { $_ => true } @{$handler->{fields}}; return ($rewrite{$field} || $rewrite{'*'}); } sub _subst_data { my $self = shift; my ($html, $tag) = @_; return (map { [ $_ => $html->{$tag}->{attr}->{$_} ] } keys %{$html->{$tag}->{attr}}), (map { [ $_ => $html->{$tag}->{$_} ] } grep /^(?:text)$/, keys %{$html->{$tag}}); } sub _subst_pattern { my $self = shift; my ($html, $tag) = @_; if (scalar @{$html->{$tag}->{attrseq}}) { my $attr = join ' ', map "${_}=\"$html->{$tag}->{attr}->{$_}\"", @{$html->{$tag}->{attrseq}}; my $text = $html->{$tag}->{text}; return defined $text ? qr{<$tag\s+?\Q$attr\E>$text} : qr{<$tag\s+?\Q$attr\E>(.*?)}; } else { return qr{<$tag>(.*?)}; } } 1;