package HTML::TreeBuilder::Select; use warnings; use strict; =head1 NAME HTML::TreeBuilder::Select - Traverse a HTML tree using CSS selectors =head1 VERSION Version 0.111 =cut our $VERSION = '0.111'; use HTML::TreeBuilder::XPath; use Class::Accessor; use base qw(HTML::TreeBuilder::XPath); sub __container { my $self = shift; return $self->{__container} unless @_; return $self->{__container} = shift } sub __fake_container { my $self = shift; return $self->{__fake_container} unless @_; return $self->{__fake_container} = shift } =head1 SYNOPSIS my $tree = new HTML::TreeBuilder::Select my @entries = $tree->select("div.main div.entry"); =over 4 =item @elements = $tree->select(QUERY) Search the tree for elements matching the C, which should be a CSS selector. =item $tree->dump_HTML() Returns a string representation of the tree in (possibly invalid) HTML format. This method will preserve any text outside of the root-level elements and NOT automatically wrap the content in ... . =cut sub dump_HTML { my $self = shift; return unless my $container = $self->container; my @content; @content = $self->__fake_container ? $container->content_list : ($container); return join '', map { if (ref $_) { $_ = $_->as_HTML } $_ } @content; } =item my $element = $tree->container() A convenience method that will return either the containing element of the tree, or a simple div container containing the root-level elements. This is very similar to the C method, but C will also remember whether the tree had a containing root element or not. =cut sub container { my $self = shift; my $container = $self->__container; return $container if $container; my @content = $self->guts; if (1 == @content && ref $content[0]) { $container = $content[0]; } else { $self->__fake_container(1); $container = scalar $self->guts; } return unless $container; $self->__container($container); return $container; } =item $tree->delete() Same as L =cut sub delete { my $self = shift; $self->__fake_container(undef); $self->__container(undef); return $self->SUPER::delete; } =back =cut package HTML::Element; use HTML::TreeBuilder::XPath; use HTML::Selector::XPath qw(selector_to_xpath); use Carp; use constant _KEEP => "keep"; use constant _REPLACE => "replace"; use constant _DELETE => "delete"; sub select { my $self = shift; my $query = shift or croak "Need a query (a CSS selector or XPath)"; my $operation = shift; my $path; if ($query =~ s/^~//) { $path = $query; } elsif (! ref $query) { $path = selector_to_xpath($query); } my @elements = $self->findnodes($path); return wantarray ? @elements : $elements[0] unless $operation; if (ref $operation eq "CODE") { for my $element (@elements) { my @result = $operation->($element); my $directive = shift @result; $directive &&= lc $directive; if (! $directive || $directive eq _KEEP) { } elsif ($directive eq _REPLACE) { my $replacement = shift @result; if (ref $replacement eq "ARRAY") { $replacement = HTML::Element->new_from_lol($replacement); } $element->replace_with($replacement)->delete; } elsif ($directive eq _DELETE) { $element->delete; } } } elsif ($operation =~ m/^#$/i) { return scalar @elements; } else { croak "Operation ($operation) not permitted"; } } =head1 AUTHOR Robert Krimen, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc HTML::TreeBuilder::Select You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 ACKNOWLEDGEMENTS =head1 COPYRIGHT & LICENSE Copyright 2007 Robert Krimen, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of HTML::TreeBuilder::Select