## -*- cperl -*-
package XML::DT;
BEGIN {
#XML::LIBXML# use XML::LibXML;
#XML::PARSER# use XML::Parser;
use Data::Dumper;
use LWP::Simple;
use XML::DTDParser "ParseDTDFile";
use Exporter ();
use vars qw($c %v $q @dtcontext %dtcontextcount @dtatributes @dtattributes );
# Para tirar, parece-me
#eval "use bytes";
#if (my $m = $INC{"bytes.pm"}) {require bytes; import bytes;}
@ISA=qw(Exporter);
@EXPORT=qw(&dt &dtstring &dturl &inctxt &ctxt &mkdtskel &mkdtskel_fromDTD &mkdtdskel &toxml
&MMAPON $c %v $q &xmltree &pathdturl
@dtcontext %dtcontextcount @dtatributes @dtattributes &pathdt &pathdtstring );
$VERSION = '0.37';
#XML::LIBXML# $PARSER = 'XML::LibXML';
#XML::PARSER# $PARSER = 'XML::Parser';
}
=head1 NAME
XML::DT - a package for down translation of XML files
=head1 SYNOPSIS
use XML::DT;
%xml=( 'music' => sub{"Music from: $c\n"},
'lyrics' => sub{"Lyrics from: $v{name}\n"},
'title' => sub{ uc($c) },
'-default' => sub{"$q:$c"} );
print dt($filename,%xml);
=head1 ABSTRACT
This module is a XML down processor. It maps tag (element)
names to functions to process that element and respective
contents.
=head1 DESCRIPTION
This module processes XML files with an approach similar to
OMNIMARK. As XML parser it uses XML::Parser or XML::LibXML module in
an independent way. At configure stage, you should choose one of the
back-ends.
If you use XML::LibXML module as backend, you can parse HTML files as
if they were XML files. For this, you must supply an extra option to
the hash:
%hander = ( -html => 1,
...
);
=head1 Functions
=head2 dt
Down translation function C
receives a filename and a set of
expressions (functions) defining the processing and associated values
for each element.
=head2 dtstring
C works in a similar way with C but takes input from a
string instead of a file.
=head2 dturl
C works in a similar way with C but takes input from an
Internet url instead of a file.
=head2 pathdt
The C function is a C function which can handle a subset
of XPath on handler keys. Example:
%handler = (
"article/title" => sub{ toxml("h1",{},$c) },
"section/title" => sub{ toxml("h2",{},$c) },
"title" => sub{ $c },
"//image[@type='jpg']" => sub{ "JPEG:
" },
"//image[@type='bmp']" => sub{ "BMP: sorry, no bitmaps on the web" },
)
pathdt($filename,%handler);
Here are some examples of valid XPath expressions under XML::DT:
/aaa
/aaa/bbb
//ccc - ccc somewhere (same as "ccc")
/*/aaa/*
//* - same as "-default"
/aaa[@id] - aaa with an attribute id
/*[@*] - root with an attribute
/aaa[not(@name)] - aaa with no attribute "name"
//bbb[@name='foo'] - ... attribute "name" = "foo"
/ccc[normalize-space(@name)='bbb']
//*[name()='bbb'] - complex way of saying "//bbb"
//*[starts-with(name(),'aa')] - an element named "aa.*"
//*[contains(name(),'c')] - an element ".*c.*"
//aaa[string-length(name())=4] "...."
//aaa[string-length(name())<4] ".{1,4}"
//aaa[string-length(name())>5] ".{5,}"
Note that not all XPath is currently handled by XML::DT. A lot of
XPath will never be added to XML::DT because is not in accordance with
the down translation model. For more documentation about XPath check
the specification at http://www.w3c.org or some tutorials under
http://www.zvon.org
=head2 pathdtstring
Like the C function but supporting XPath.
=head2 pathdturl
Like the C function but supporting XPath.
=head2 inctxt
C is true if the actual element path matches the
provided pattern. This function is meant to be used in the element
functions in order to achieve context dependent processing.
=head2 ctxt
Returns the context element of the currently being processed
element. So, if you call C you will get your father element,
and so on.
=head2 toxml
This is the default "-default" function. It can be used to generate
XML based on C<$c> C<$q> and C<%v> variables. Example: add a new
attribute to element C without changing it:
%handler=( ...
ele1 => sub { $v{at1} = "v1"; toxml(); },
)
C can also be used with 3 arguments: tag, attributes and contents
toxml("a",{href=> "http://local/f.html"}, "example")
returns:
example
=head2 xmltree
This simple function just makes a HASH reference:
{ -c => $c, -q => $q, all_the_other_attributes }
The function C understands this structure and makes XML with it.
=head2 mkdtskel
Used by the mkdtskel script to generate automatically a XML::DT perl
script file based on an XML file. Check C manpage for
details.
=head2 mkdtskel_fromDTD
Used by the mkdtskel script to generate automatically a XML::DT perl
script file based on an DTD file. Check C manpage for
details.
=head2 mkdtdskel
Used by the mkdtskel script to generate automatically a XML::DT perl
script file based on a DTD file. Check C manpage for
details.
=head1 Accessing parents
With XML::DT you can access an element parent (or grand-parent)
attributes, till the root of the XML document.
If you use c<$dtattributes[1]{foo} = 'bar'> on a processing function,
you are defining the attribute C for that element parent.
In the same way, you can use C<$dtattributes[2]> to access the
grand-parent. C<$dtattributes[-1]> is, as expected, the XML document
root element.
=head1 User provided element processing functions
The user must provide an HASH with a function for each element, that
computes element output. Functions can use the element name C<$q>, the
element content C<$c> and the attribute values hash C<%v>.
All those global variables are defined in C<$CALLER::>.
Each time an element is find the associated function is called.
Content is calculated by concatenation of element contents strings and
interior elements return values.
=head2 C<-default> function
When a element has no associated function, the function associated
with C<-default> called. If no C<-default> function is defined the
default function returns a XML like string for the element.
When you use C-type> definitions, you often need do set C<-default>
function to return just the contents: C.
=head2 C<-outputenc> option
C<-outputenc> defines the output encoding (default is Unicode UTF8).
=head2 C<-inputenc> option
C<-inputenc> forces a input encoding type. Whenever that is possible,
define the input encoding in the XML file:
=head2 C<-pcdata> function
C<-pcdata> function is used to define transformation over the
contents. Typically this function should look at context (see
C function)
The default C<-pcdata> function is the identity
=head2 C<-begin> function
Function to be executed before processing XML file.
Example of use: initialization of side-effect variables
=head2 C<-end> function
Function to be executed after processing XML file. I can use C<$c>
content value. The value returned by C<-end> will be the C return
value.
Example of use: post-processing of returned contents
=head1 Elements with values other than strings (C<-type>)
By default all elements return strings, and contents (C<$c>) is the
concatenation of the strings returned by the sub-elements.
In some situations the XML text contains values that are better
processed as a structured type.
The following types (functors) are available:
=over 4
=item THE_CHILD
Return the result of processing the only child of the element.
=item LAST_CHILD
Returns the result of processing the last child of the element.
=item STR
concatenates all the sub-elements returned values (DEFAULT) all the
sub-element should return strings to be concatenated;
=item SEQ
makes an ARRAY with all the sub elements contents; attributes are
ignored (they should be processed in the sub-element). (returns a ref)
If you have different types of sub-elements, you should use SEQH
=item SEQH
makes an ARRAY of HASH with all the sub elements (returns a ref); for
each sub-element:
-q => element name
-c => contents
at1 => at value1 for each attribute
=item MAP
makes an HASH with the sub elements; keys are the sub-element names,
values are their contents. Attributes are ignored. (they should be
processed in the sub-element) (returns a ref)
=item MULTIMAP
makes an HASH of ARRAY; keys are the sub-element names; values are
lists of contents; attributes are ignored (they should be processed in
the sub-element); (returns a ref)
=item MMAPON(element-list)
makes an HASH with the sub-elements; keys are the sub-element names,
values are their contents; attributes are ignored (they should be
processed in the sub-element); for all the elements contained in the
element-list, it is created an ARRAY with their contents. (returns a
ref)
=item XML
return a reference to an HASH with:
-q => element name
-c => contents
at1 => at value1 for each attribute
=item ZERO
don't process the sub-elements; return ""
=back
When you use C-type> definitions, you often need do set C<-default>
function returning just the contents C.
=head2 An example:
use XML::DT;
%handler = ( contacts => sub{ [ split(";",$c)] },
-default => sub{$c},
-type => { institution => 'MAP',
degrees => MMAPON('name')
tels => 'SEQ' }
);
$a = dt ("f.xml", %handler);
with the following f.xml
U.M.
University of Minho
- 1111
- 1112
- 1113
Portugal
J.Joao; J.Rocha; J.Ramalho
Computer science
Informatica
history
would make $a
{ 'name' => [ 'Computer science',
'Informatica ',
' history ' ],
'institution' => { 'tels' => [ 1111, 1112, 1113 ],
'name' => 'University of Minho',
'where' => 'Portugal',
'id' => 'U.M.',
'contacts' => [ 'J.Joao',
' J.Rocha',
' J.Ramalho' ] } };
=head1 DT Skeleton generation
It is possible to build an initial processor program based on an example
To do this use the function C.
Example:
perl -MXML::DT -e 'mkdtskel "f.xml"' > f.pl
=head1 DTD skeleton generation
It makes a naive DTD based on an example(s).
To do this use the function C.
Example:
perl -MXML::DT -e 'mkdtdskel "f.xml"' > f.dtd
=head1 SEE ALSO
mkdtskel(1) and mkdtdskel(1)
=head1 AUTHORS
Home for XML::DT;
http://natura.di.uminho.pt/~jj/perl/XML/
Jose Joao Almeida,
Alberto Manuel Simões,
thanks to
Michel Rodriguez
José Carlos Ramalho
Mark A. Hillebrand
=head1 COPYRIGHT AND LICENSE
Copyright 1999-2004 by Projecto Natura
This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=cut
%ty = ();
sub dt {
my ($file,%xml)=@_;
# Treat -decl option
my $declr = "";
if ($xml{-declr}) {
if ($xml{-outputenc}) {
$declr = "\n";
} else {
$declr = "\n";
}
}
%ty = ();
%ty = (%{$xml{'-type'}}) if defined($xml{'-type'});
$ty{-ROOT} = "NONE";
&{$xml{-begin}} if $xml{-begin};
#XML::LIBXML## TODO --- how to force encoding with XML::LibXML?
#XML::LIBXML## $xml{-inputenc}
#XML::LIBXML## create a new LibXML parser
#XML::LIBXML# my $parser = XML::LibXML->new();
#XML::LIBXML# $parser->validation(0);
#XML::LIBXML# $parser->load_ext_dtd(0);
#XML::LIBXML## Check if we should expand entities
#XML::LIBXML# $parser->expand_entities(1) if defined $xml{'-noexpand'} && $xml{'-noexpand'};
#XML::LIBXML## parse the file
#XML::LIBXML# my $doc;
#XML::LIBXML# if ( $xml{'-html'}) {
#XML::LIBXML# $parser->recover(1);
#XML::LIBXML# eval{$doc = $parser->parse_html_file($file);};
#XML::LIBXML## if ($@) {warn("Erro: $@\n"); } #{return undef; }
#XML::LIBXML# return undef if !$doc;
#XML::LIBXML# }
#XML::LIBXML# else{ $doc = $parser->parse_file($file) }
#XML::LIBXML## get the document root element
#XML::LIBXML# my $tree = $doc->getDocumentElement();
#XML::PARSER## create a new XML::Parser instance using Tree Style
#XML::PARSER# if (defined($xml{-inputenc}) && ($xml{-inputenc} eq 'ISO-8859-1')){
#XML::PARSER# $parser = new XML::Parser(Style => 'Tree',
#XML::PARSER# ErrorContext => 2 ,
#XML::PARSER# ProtocolEncoding => 'ISO-8859-1');
#XML::PARSER# }
#XML::PARSER# else { $parser = new XML::Parser(Style => 'Tree',
#XML::PARSER# ErrorContext => 2 ,
#XML::PARSER# );
#XML::PARSER# }
#XML::PARSER## Convert XML to Perl code
#XML::PARSER# $tree = $parser->parsefile($file);
my $return = "";
# execute End action if it exists
if($xml{-end}) {
#XML::LIBXML# $c = _omni("-ROOT", \%xml, $tree);
#XML::PARSER# $c = _omni("-ROOT", \%xml, @$tree);
$return = &{$xml{-end}}
} else {
#XML::LIBXML# $return = _omni("-ROOT",\%xml, $tree)
#XML::PARSER# $return = _omni("-ROOT",\%xml, @$tree)
}
if ($declr) {
return $declr.$return;
} else {
return $return;
}
}
sub ctxt {
my $level = $_[0];
$dtcontext[-$level-1];
}
sub inctxt {
my $pattern = shift ;
# see if is in root context...
return 1 if (($pattern eq "^" && @dtcontext==1) || $pattern eq ".*");
join("/",@dtcontext) =~ m!$pattern/[^/]*$! ;
}
sub pathdtstring{
my $string = shift;
my %h = _pathtodt(@_);
return dtstring($string,%h);
}
sub pathdturl{
my $url = shift;
my %h = _pathtodt(@_);
return dturl($url,%h);
}
sub dturl{
my $url = shift;
my $contents = get($url);
if ($contents) {
return dtstring($contents, @_);
} else {
return undef;
}
}
sub dtstring {
my ($string,%xml)=@_;
my $declr = "";
if ($xml{-declr}) {
if ($xml{-outputenc}) {
$declr = "\n";
} else {
$declr = "\n";
}
}
$xml{'-type'} = {} unless defined $xml{'-type'};
%ty = (%{$xml{'-type'}}, -ROOT => "NONE");
# execute Begin action if it exists
if ($xml{-begin}){ &{$xml{-begin}} }
#XML::LIBXML# $string = XML::LibXML::encodeToUTF8($xml{-inputenc},$string) if ($xml{-inputenc});
#XML::LIBXML## create a new LibXML parser
#XML::LIBXML# my $parser = XML::LibXML->new();
#XML::LIBXML## Check if we should expand entities
#XML::LIBXML# $parser->expand_entities(0) if defined $xml{'-noexpand'} && $xml{'-noexpand'};
#XML::LIBXML## parse the string
#XML::LIBXML# my $doc;
#XML::LIBXML# $doc = $parser->parse_string($string) unless( $xml{'-html'});
#XML::LIBXML# if ( $xml{'-html'}) {
#XML::LIBXML# $parser->recover(1);
#XML::LIBXML# eval{$doc = $parser->parse_html_string($string);};
#XML::LIBXML## if ($@) { return undef; }
#XML::LIBXML# return undef unless defined $doc;
#XML::LIBXML# }
#XML::LIBXML## get the document root element
#XML::LIBXML# my $tree = $doc->getDocumentElement();
#XML::PARSER## create a new XML::Parser instance using Tree Style
#XML::PARSER# if (defined($xml{-inputenc}) && ($xml{-inputenc} eq 'ISO-8859-1')){
#XML::PARSER# $parser = new XML::Parser(Style => 'Tree',
#XML::PARSER# ErrorContext => 2 ,
#XML::PARSER# ProtocolEncoding => 'ISO-8859-1');
#XML::PARSER# }
#XML::PARSER# else { $parser = new XML::Parser(Style => 'Tree',
#XML::PARSER# ErrorContext => 2 ,
#XML::PARSER# );
#XML::PARSER# }
#XML::PARSER## Convert XML to Perl code (Tree)
#XML::PARSER# $tree = $parser->parse($string);
my $return;
# Check if we have an end function
if ($xml{-end}) {
#XML::LIBXML# $c = _omni("-ROOT", \%xml, $tree);
#XML::PARSER# $c = _omni("-ROOT", \%xml, @$tree);
$return = &{$xml{-end}}
} else {
#XML::LIBXML# $return = _omni("-ROOT", \%xml, $tree)
#XML::PARSER# $return = _omni("-ROOT", \%xml, @$tree)
}
if ($declr) {
return $declr.$return;
} else {
return $return;
}
}
sub pathdt{
my $file = shift;
my %h = _pathtodt(@_);
return dt($file,%h);
}
# Parsing dos predicados do XPath
sub _testAttr {
my $atr = shift;
for ($atr) {
s/name\(\)/'$q'/g;
# s/\@([A-Za-z_]+)/'$v{$1}'/g;
s/\@([A-Za-z_]+)/defined $v{$1}?"'$v{$1}'":"''"/ge;
s/\@\*/keys %v?"'1'":"''"/ge;
if (/^not\((.*)\)$/) {
return ! _testAttr($1);
} elsif (/^('|")([^\1]*)(\1)\s*=\s*('|")([^\4]*)\4$/) {
return ($2 eq $5);
} elsif (/normalize-space\((['"])([^\1)]*)\1\)/) {
my ($back,$forward)=($`,$');
my $x = _normalize_space($2);
return _testAttr("$back'$x'$forward");
} elsif (/starts-with\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) {
my $x = _starts_with($2,$4);
return $x;
} elsif (/contains\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) {
my $x = _contains($2,$4);
return $x;
} elsif (/string-length\((['"])([^\1]*)\1\)/) {
my ($back,$forward) = ($`,$');
my $x = length($2);
return _testAttr("$back$x$forward");
} elsif (/^(\d+)\s*=(\d+)$/) {
return ($1 == $2);
} elsif (/^(\d+)\s*<(\d+)$/) {
return ($1 < $2);
} elsif (/^(\d+)\s*>(\d+)$/) {
return ($1 > $2);
} elsif (/^(['"])([^\1]*)\1$/) {
return $2;
}
}
return 0; #$atr;
}
# Funcao auxiliar de teste de predicados do XPath
sub _starts_with {
my ($string,$preffix) = @_;
return 0 unless ($string && $preffix);
return 1 if ($string =~ m!^$preffix!);
return 0;
}
# Funcao auxiliar de teste de predicados do XPath
sub _contains {
my ($string,$s) = @_;
return 0 unless ($string && $s);
return 1 if ($string =~ m!$s!);
return 0;
}
# Funcao auxiliar de teste de predicados do XPath
sub _normalize_space {
my $z = shift;
$z =~ /^\s*(.*?)\s*$/;
$z = $1;
$z =~ s!\s+! !g;
return $z;
}
sub _pathtodt {
my %h = @_;
my %aux=();
my %aux2=();
my %n = ();
my $z;
for $z (keys %h) {
# TODO - Make it more generic
if ( $z=~m{\w+(\|\w+)+}) {
my @tags = split /\|/, $z;
for(@tags) {
$aux2{$_}=$h{$z}
}
}
elsif ( $z=~m{(//|/|)(.*)/([^\[]*)(?:\[(.*)\])?} ) {
my ($first,$second,$third,$fourth) = ($1,$2,$3,$4);
if (($first eq "/") && (!$second)) {
$first = "";
$second = '.*';
$third =~ s!\*!-default!;
} else {
$second =~ s!\*!\[^/\]\+!g;
$second =~ s!/$!\(/\.\*\)\?!g;
$second =~ s!//!\(/\.\*\)\?/!g;
$third =~ s!\*!-default!g;
}
push( @{$aux{$third}} , [$first,$second,$h{$z},$fourth]);
}
else { $aux2{$z}=$h{$z};}
}
for $z (keys %aux){
my $code = sub {
my $l;
for $l (@{$aux{$z}}) {
my $prefix = "";
$prefix = "^" unless (($l->[0]) or ($l->[1]));
$prefix = "^" if (($l->[0] eq "/") && ($l->[1]));
if ($l->[3]) {
if(inctxt("$prefix$l->[1]") && _testAttr($l->[3]))
{return &{$l->[2]}; }
} else {
if(inctxt("$prefix$l->[1]")) {return &{$l->[2]};}
}
}
return &{ $aux2{$z}} if $aux2{$z} ;
return &{ $h{-default}} if $h{-default};
&toxml();
};
$n{$z} = $code;
}
for $z (keys %aux2){
$n{$z} ||= $aux2{$z} ;
}
return %n;
}
sub _omni{
my ($par, $xml, @l) = @_;
my $type = $ty{$par} || "STR";
my %typeargs = ();
if (ref($type) eq "mmapon") {
for (@$type)
{ $typeargs{$_} = 1 }
$type = "MMAPON";
};
my $r ;
if( $type eq 'STR') { $r = "" }
elsif( $type eq 'THE_CHILD' or $type eq 'LAST_CHILD') { $r = 0 }
elsif( $type eq 'SEQ' or $type eq "ARRAY") { $r = [] }
elsif( $type eq 'SEQH' or $type eq "ARRAYOFHASH") { $r = [] }
elsif( $type eq 'MAP' or $type eq "HASH") { $r = {} }
elsif( $type eq 'MULTIMAP') { $r = {} }
elsif( $type eq 'MMAPON' or $type eq "HASHOFARRAY") { $r = {} }
elsif( $type eq 'NONE') { $r = "" }
elsif( $type eq 'ZERO') { return "" }
my ($name, $val, @val, $atr, $aux);
while(@l) {
#XML::LIBXML# my $tree = shift @l;
#XML::LIBXML# if (ref($tree) eq "XML::LibXML::CDATASection") {
#XML::LIBXML# $name = "-pcdata";
#XML::LIBXML## $val = $tree->getData();
#XML::LIBXML## print STDERR Dumper($val);
#XML::LIBXML## $aux= (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val;
#XML::LIBXML## if (defined($xml->{-pcdata})) {
#XML::LIBXML## push(@dtcontext,"-pcdata");
#XML::LIBXML### $c = $aux;
#XML::LIBXML#### $aux = &{$xml->{-pcdata}};
#XML::LIBXML## pop(@dtcontext);
#XML::LIBXML## }
#XML::LIBXML# } else {
#XML::LIBXML# $name = $tree->getName();
#XML::LIBXML# }
#XML::PARSER# ($name,$val,@l) = @l;
#XML::LIBXML# if (ref($tree) eq "XML::LibXML::Comment") {
#XML::LIBXML# ### At the moment, treat as Text
#XML::LIBXML# ### We will need to change this, I hope!
#XML::LIBXML# $val = "";
#XML::LIBXML# $name = "-pcdata";
#XML::LIBXML# $aux= (defined($xml->{-outputenc}))?_fromUTF8($val, $xml->{-outputenc}):$val;
#XML::LIBXML# if (defined($xml->{-pcdata})) {
#XML::LIBXML# push(@dtcontext,"-pcdata");
#XML::LIBXML# $c = $aux;
#XML::LIBXML# $aux = &{$xml->{-pcdata}};
#XML::LIBXML# pop(@dtcontext);
#XML::LIBXML# }
#XML::LIBXML# } elsif (ref($tree) eq "XML::LibXML::Text" || ref($tree) eq "XML::LibXML::CDATASection")
#XML::PARSER# if ($name eq "0")
{
#XML::LIBXML# $val = $tree->getData();
$name = "-pcdata";
$aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val;
if (defined($xml->{-pcdata})) {
push(@dtcontext,"-pcdata");
$c = $aux;
$aux = &{$xml->{-pcdata}};
pop(@dtcontext);
}
} else {
#XML::LIBXML# my %atr = _nodeAttributes($tree);
#XML::LIBXML# $atr = \%atr;
#XML::PARSER# ($atr,@val) = @$val;
push(@dtcontext,$name);
$dtcontextcount{$name}++;
unshift(@dtatributes, $atr);
unshift(@dtattributes, $atr);
#XML::LIBXML# $aux = _omniele($xml, $name, _omni($name, $xml, ($tree->getChildnodes())), $atr);
#XML::PARSER# $aux = _omniele($xml, $name, _omni($name, $xml, @val), $atr);
shift(@dtatributes);
shift(@dtattributes);
pop(@dtcontext); $dtcontextcount{$name}--;
}
if ($type eq "STR"){ if (defined($aux)) {$r .= $aux} ;}
elsif ($type eq "THE_CHILD" or $type eq "LAST_CHILD"){
$r = $aux unless _whitepc($aux, $name); }
elsif ($type eq "SEQ" or $type eq "ARRAY"){
push(@$r, $aux) unless _whitepc($aux, $name);}
elsif ($type eq "SEQH" or $type eq "ARRAYHASH"){
push(@$r,{"-c" => $aux,
"-q" => $name,
#XML::LIBXML# _nodeAttributes($tree)
#XML::PARSER# %$atr
}) unless _whitepc($aux,$name);
}
elsif($type eq "MMAPON"){
if(not _whitepc($aux,$name)){
if(! $typeargs{$name}) {
warn "duplicated tag ´$name´\n" if(defined($r->{$name}));
$r->{$name} = $aux }
else { push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}}
}
elsif($type eq "MAP" or $type eq "HASH"){
if(not _whitepc($aux,$name)){
warn "duplicated tag ´$name´\n" if(defined($r->{$name}));
$r->{$name} = $aux }}
elsif($type eq "MULTIMAP"){
push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}
elsif($type eq "NONE"){ $r = $aux;}
else { $r="undefined type !!!"}
}
$r;
}
sub _omniele {
my $xml = shift;
my $aux;
($q, $c, $aux) = @_;
%v = %$aux;
if (defined($xml->{-outputenc})) {
for (keys %v){
$v{$_} = _fromUTF8($v{$_}, $xml->{-outputenc})
}
}
if (defined $xml->{$q})
{ &{$xml->{$q}} }
elsif (defined $xml->{'-default'})
{ &{$xml->{'-default'}} }
else
{ toxml() }
}
sub xmltree {
+{'-c' => $c, '-q' => $q, %v}
}
# está a ser usada pela toxml1, que não é usada :-)
# sub toxmlp {
# my ($q,$v,$c ) = @_;
# if ($q eq "-pcdata") { $c }
# else {"<$q" . join("",map {" $_=\"$v->{$_}\""} keys %$v ) . ">$c$q>" }
# }
sub toxml {
my ($q,$v,$c);
if (not @_) {
($q,$v,$c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c);
} elsif (ref($_[0])) {
$c = shift;
} else {
($q,$v,$c) = @_;
}
if (not ref($c)) {
if ($q eq "-pcdata") {
return $c
}
return _openTag($q,$v) . "$c$q>"
}
elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) {
my %a = %$c;
my ($q,$c) = delete @a{"-q","-c"};
toxml($q,\%a,$c);
}
elsif (ref($c) eq "HASH") {
_openTag($q,$v).
join("",map {($_ ne "-pcdata")
? ( (ref($c->{$_}) eq "ARRAY")
? "<$_>".
join("$_>\n<$_>", @{$c->{$_}}).
"$_>\n"
: toxml($_,{},$c->{$_})."\n" )
: () }
keys %{$c} ) .
"$c->{-pcdata}$q>" } ######## "NOTYetREady"
elsif (ref($c) eq "ARRAY") {
if($ty{$q} eq "SEQH"){toxml($q,$v,join("\n",map {toxml($_)} @$c))}
else {toxml($q,\%v, join("",@{$c}))}
}
}
sub _openTag{
"<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} ).">"
}
# toxml1 is not being used.
# sub toxml1 {
# if(@_ == 3){return toxmlp(@_)}
# return "" if (defined $ty{$q} && $ty{$q} eq "ZERO");
# if(not ref($c)){ toxmlp($q,\%v,$c)}
# elsif (ref($c) eq "ARRAY") {
# if($ty{$q} eq "SEQH") {
# toxmlp($q,{},
# join("",map {my %a=%$_;
# delete @a{"-q","-c"};
# toxmlp($_->{'-q'},\%a,$_->{-c}) } @{$c} ))
# }
# else { toxmlp($q,\%v, join("",@{$c}))}
# }
# elsif (ref($c) eq "HASH") {
# "<$q".
# join("",map {" $_=\"$v{$_}\""} keys %v ) . ">" .
# join("",map {($_ ne "-pcdata")
# ? ( (ref($c->{$_}) eq "ARRAY")
# ? "<$_>".
# join("$_>\n<$_>", @{$c->{$_}}).
# "$_>\n"
# : "<$_>$c->{$_}$_>\n" )
# : () }
# keys %{$c} ) .
# "$c->{-pcdata}$q>" }
# }
sub mkdtskel_fromDTD {
my $filename = shift;
my $file = ParseDTDFile($filename);
print <<'PERL';
#!/usr/bin/perl
use XML::DT ;
my $filename = shift;
# Variable Reference
#
# $c - contents after child processing
# $q - element name (tag)
# %v - hash of attributes
%handler=(
# '-outputenc' => 'ISO-8859-1',
# '-default' => sub{"<$q>$c$q>"},
PERL
for (sort keys %{$file}) {
print " '$_' => sub { },";
print " # attributes: ",
join(", ", keys %{$file->{$_}{attributes}}) if exists($file->{$_}{attributes});
print "\n";
}
print <<'PERL';
);
print dt($filename,%handler);
PERL
}
sub mkdtskel{
my @files = @_;
my %mkdtskel =
('-default' => sub{
$element{$q}++;
for (keys %v) {
$att{$q}{$_} = 1
};
""},
'-end' => sub{
print <<'END';
#!/usr/bin/perl
use XML::DT ;
my $filename = shift;
# Variable Reference
#
# $c - contents after child processing
# $q - element name (tag)
# %v - hash of attributes
%handler=(
# '-outputenc' => 'ISO-8859-1',
# '-default' => sub{"<$q>$c$q>"},
END
for $name (sort keys %element) {
print " '$name' => sub{ }, #";
print " $element{$name} occurrences;";
print ' attributes: ',
join(', ', keys %{$att{$name}}) if $att{$name};
# print " \"\$q:\$c\"\n";
print "\n";
}
print <<'END';
);
print dt($filename,%handler);
END
}
);
$file = shift(@files);
while($file =~ /^-/){
if ($file eq "-html") { $mkdtskel{'-html'} = 1;}
elsif($file eq "-latin1") { $mkdtskel{'-inputenc'}='ISO-8859-1';}
else { die("usage mktskel [-html] [-latin1] file \n")}
$file=shift(@files)}
dt($file,%mkdtskel)
}
sub _nodeAttributes {
my $node = shift;
my %answer = ();
my @attrs = $node->getAttributes();
for (@attrs) {
if (ref($_) eq "XML::LibXML::Namespace") {
# TODO: This should not be ignored, I think.
# This sould be converted on a standard attribute with
# key 'namespace' and respective contents
} else {
$answer{$_->getName()} = $_->getValue();
}
}
return %answer;
}
sub mkdtdskel {
my @files = @_;
my %handler=(
'-outputenc' => 'ISO-8859-1',
'-default' => sub{
$elel{$q}++;
$root = $q unless ctxt(1);
$ele{ctxt(1)}{$q} ++;
for(keys(%v)){$att{$q}{$_} ++ } ;
},
'-pcdata' => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1 }},
);
while($files[0] =~ /^-/){
if ($files[0] eq "-html") { $handler{'-html'} = 1;}
elsif($files[0] eq "-latin1") { $handler{'-inputenc'}='ISO-8859-1';}
else { die("usage mkdtdskel [-html] [-latin1] file* \n")}
shift(@files)}
for $filename (@files){
dt($filename,%handler);
}
print "\n\n";
delete $elel{$root};
for ($root, keys %elel){
_putele($_);
for $name (keys(%{$att{$_}})) {
print( "\t\n");
print( "\t\n");
}
}
}
# This is NOT being used.
#
# sub _process_libxml {
# my $node = shift;
# my $ref = ref($node);
# if ($ref eq "XML::LibXML::Text") {
# return [0, $node->toString() ];
# } else {
# my $name = $node->getName();
# my %attr = ( map { ($_->getName(), $_->getValue()) } $node->getAttributes() );
# my $x = [ $name, [ { %attr }, map {my $w = _process_libxml($_); @$w } $node->getChildnodes() ]];
# return $x;
# }
# }
sub _putele {
my $e = shift;
my @f ;
if ($ele{$e}) {
@f = keys %{$ele{$e}};
print "= 1 && $f[0] eq "#PCDATA" ? "" : "*"),
" >\n";
print "\n";
}
else {
print "\n";
}
}
sub _whitepc {
$_[1] eq '-pcdata' and $_[0] =~ /^[ \t\n]*$/
}
sub MMAPON {
bless([@_],"mmapon")
}
# not being used...
#
# sub SEQOF {
# bless([@_],"seqof")
# }
#######################################
# #
# Auxiliary functions for encodings #
# #
### STARTLATIN1 #######################
sub _fromUTF8 {
my $string = shift;
my $encode = shift;
my $ans = eval { XML::LibXML::decodeFromUTF8($encode, $string) };
if ($@) {
return $string
} else {
return $ans
}
}
### ENDLATIN1
1;