use Test::More tests => 12; use Data::Dumper; BEGIN { use_ok( 'XML::Descent' ); } my $xml = < http://hexten.net/ http://www.koders.com/ http://search.cpan.org/ http://perldoc.perl.org/ http://www.ruby-lang.org/ Frog fleening The body text is just HTML. http://cpan.hexten.net/ This text is ignored This has a handler which doesn't recursively parse the contents This is tokenised. EOX # Trailing newline causes problems chomp $xml; #### Test xml() returns original source my $p1 = XML::Descent->new( { Input => \$xml } ); my $o1 = $p1->xml(); is( $o1, $xml, 'unparsed XML' ); #### Test text() returns all text ( my $text = $xml ) =~ s/<[^>]+>//g; my $p2 = XML::Descent->new( { Input => \$xml } ); my $o2 = $p2->text(); is( $o2, $text, 'extracted text' ); #### Global extract tag contents my @furls = $xml =~ />(http:.+?)new( { Input => \$xml } ); my @gurls = (); $p3->on( url => sub { push @gurls, $p3->text(); } ); $p3->walk(); is_deeply( \@gurls, \@furls, 'extract urls' ); #### Get all elements my @felem = $xml =~ /<(\w+)/g; my $p4 = XML::Descent->new( { Input => \$xml } ); my @gelem = (); $p4->on( '*' => sub { my ( $elem, $attr ) = @_; push @gelem, $elem; $p4->walk(); } ); $p4->walk(); is_deeply( \@gelem, \@felem, 'all elements' ); #### Global extract attribute my @fnames = $xml =~ /name=\"(.*?)\"/g; my $p5 = XML::Descent->new( { Input => \$xml } ); my @gnames = (); $p5->on( '*' => sub { my ( $elem, $attr ) = @_; push @gnames, $attr->{name} if exists $attr->{name}; $p5->walk(); } ); $p5->walk(); is_deeply( \@gnames, \@fnames, 'extracted attributes' ); #### Extract inner XML my @fmeta = $xml =~ m{(.*?)}sm; my $gmeta = undef; my $p6 = XML::Descent->new( { Input => \$xml } ); $p6->on( meta => sub { $gmeta = $p6->xml(); } ); $p6->walk(); is( $gmeta, $fmeta[0], 'extract inner XML' ); #### Extract inner text ( my $ftext = $fmeta[0] ) =~ s/<.+?>//g; my $gtext = undef; my $p7 = XML::Descent->new( { Input => \$xml } ); $p7->on( meta => sub { $gtext = $p7->text(); } ); $p7->walk(); is( $gtext, $ftext, 'extract inner text' ); #### Test get_tok my $ftag = bless( [ 'E', 'url', '' ], 'XML::TokeParser::Token' ); my $p8 = XML::Descent->new( { Input => \$xml } ); my $gtag = undef; $p8->on( favourites => sub { TOK: while ( my $tok = $p8->get_token() ) { if ( $tok->[0] eq 'E' ) { $gtag = $tok; last TOK; } } } ); my $gmeta2 = 0; $p8->on( meta => sub { $gmeta2++; } ); $p8->walk(); is_deeply( $gtag, $ftag, 'get_token' ); is( $gmeta2, 1, 'found meta' ); #### Test paths my @path = (); my @fpath = (); while ( $xml =~ m{<(/?[a-z]+)}g ) { my $tag = $1; if ( $tag =~ m{^/} ) { pop @path; } else { push @path, $tag; push @fpath, '/' . join( '/', @path ); } } my @gpath = (); my $p9 = XML::Descent->new( { Input => \$xml } ); $p9->on( '*' => sub { push @gpath, $p9->get_path(); $p9->walk(); } ); $p9->walk(); is_deeply( \@gpath, \@fpath, 'get_path()' ); #### Test context my $p10 = XML::Descent->new( { Input => \$xml } ); my $root = {}; $p10->context( $root ); $p10->on( '*' => sub { my ( $elem, $attr, $ctx ) = @_; my $obj = { %{$attr} }; # Keep attributes $p10->context( $obj ); $p10->walk(); # Save results in caller's context push @{ $ctx->{$elem} }, $obj; } ); # Leaf elements -> save text $p10->on( [ 'url', 'title', 'ignored', 'handled' ], sub { my ( $elem, $attr, $ctx ) = @_; push @{ $ctx->{$elem} }, { %{$attr}, inner_text => $p10->text() }; } ); $p10->on( [ 'body', 'tokenised' ] => sub { my ( $elem, $attr, $ctx ) = @_; push @{ $ctx->{$elem} }, { %{$attr}, inner_text => $p10->xml() }; } ); $p10->walk(); $fstruc = { 'config' => [ { 'favourites' => [ { 'folder' => [ { 'url' => [ { 'name' => 'Hexten', 'inner_text' => 'http://hexten.net/' } ], 'name' => 'Me' }, { 'url' => [ { 'name' => 'Source code search', 'inner_text' => 'http://www.koders.com/' } ], 'name' => 'Programming', 'folder' => [ { 'url' => [ { 'name' => 'CPAN Search', 'inner_text' => 'http://search.cpan.org/' }, { 'name' => 'Perl Documentation', 'inner_text' => 'http://perldoc.perl.org/' } ], 'name' => 'Perl' }, { 'url' => [ { 'name' => 'Ruby Home', 'inner_text' => 'http://www.ruby-lang.org/' } ], 'name' => 'Ruby' } ] } ] } ], 'meta' => [ { 'body' => [ { 'inner_text' => 'The body text is just HTML.' } ], 'handled' => [ { 'inner_text' => 'This has a handler which doesn\'t recursively parse the contents' } ], 'url' => [ { 'inner_text' => 'http://cpan.hexten.net/' } ], 'title' => [ { 'inner_text' => 'Frog fleening' } ], 'tokenised' => [ { 'inner_text' => 'This is tokenised.' } ], 'ignored' => [ { 'inner_text' => 'This text is ignored' } ] } ] } ] }; is_deeply( $root, $fstruc, 'context' );