#!/usr/bin/perl -- # -*- Perl -*- # $Id: dtdflatten,v 2.2 2005/07/16 03:22:57 ehood Exp $ # Author(s): Norman Walsh, # Earl Hood, # POD at end of file. use strict; use Getopt::Long; use SGML::DTDParse; use SGML::DTDParse::DTD; my %option = ('debug' => 0, 'verbose' => 1, 'output' => '-', 'declaration' => ''); my %opt = (); &GetOptions( \%opt, 'debug+', 'verbose!', 'output=s', 'catalog=s@', 'preserve=s@', 'declaration=s', @SGML::DTDParse::CommonOptions ) || SGML::DTDParse::usage(-verbose => 0, -exitval => 1); SGML::DTDParse::process_common_options(\%opt); foreach my $key (keys %option) { $option{$key} = $opt{$key} if exists($opt{$key}); } my @catalogs = exists($opt{'catalog'}) ? @{$opt{'catalog'}} : (); my @preserve = exists($opt{'preserve'}) ? @{$opt{'preserve'}} : (); my $file = shift @ARGV; my $output = $option{'output'} || ''; my $dtd = new SGML::DTDParse::DTD ( 'Verbose' => $option{'verbose'}, 'Debug' => $option{'debug'}, 'SgmlCatalogFilesEnv' => $option{'use-sgml-catalog-files'}, 'SourceDtd' => $file, 'Declaration' => $option{'declaration'}); foreach my $catalog (@catalogs) { $dtd->parseCatalog($catalog); } $dtd->parse($file); my $out_fh = \*STDOUT; if ($output ne '') { use Symbol; $out_fh = gensym; open ($out_fh, ">$output") || die qq{Error: Unable to create "$output": $!\n}; $dtd->status("Writing $output...", 1); } my $declcount = $dtd->declaration_count(); my @decls = $dtd->declarations(); my %peindex = (); for (my $count = 0; $count < $declcount; $count++) { $peindex{$decls[$count]->name()} = $count if $decls[$count]->type() eq 'param'; } $dtd->status("$declcount declarations.", 1); $dtd->status("Calculating used entities...", 1); my %usedPE = (); foreach my $decl (@decls) { if ($decl->type() eq 'element') { my $cm = $decl->content_model(); while ($cm =~ /^(.*?)%(\S+?);/s) { my $pe = $2; $usedPE{$pe} = 0 if !exists($usedPE{$pe}); $usedPE{$pe}++; $cm = $'; } } elsif ($decl->type() eq 'attlist') { my $text = $decl->text(); while ($text =~ /^(.*?)%(\S+?);/s) { my $pe = $2; $usedPE{$pe} = 0 if !exists($usedPE{$pe}); $usedPE{$pe}++; $text = $'; } } } # Now we know which elements use them, let's recurse... my %checkedPE = (); my $changed = 1; while ($changed) { $changed = 0; foreach my $decl (@decls) { if ($decl->type() eq 'param') { my $name = $decl->name(); my $text = $decl->text(); if ($usedPE{$name} && !$checkedPE{$name}) { $checkedPE{$name} = 1; $changed = 1; while ($text =~ /^(.*?)%(\S+?);/s) { my $pe = $2; $usedPE{$pe} = 0 if !exists($usedPE{$pe}); $usedPE{$pe}++; $text = $'; } } } } } # now output the flattened DTD print $out_fh <<'EOT'; EOT print $out_fh "\n\n"; foreach my $decl (@decls) { if ($decl->type() eq 'element') { my $name = $decl->name(); my $cm = $decl->content_model(); $cm = &expandPE($cm); print $out_fh "\n"; } elsif ($decl->type() eq 'attlist') { my $name = $decl->name(); my $text = $decl->text(); $text = &expandPE($text); print $out_fh "\n"; } elsif ($decl->type() eq 'param') { my $name = $decl->name(); my $keep = 0; if ($usedPE{$name}) { foreach my $re (@preserve) { $keep = 1 if $name =~ /$re/; last if $keep; } if ($keep) { my $text = $decl->text(); $text = &expandPE($text); my $quote = '"'; if ($text =~ /\"/s) { $quote = "'"; $text =~ s/\'/\'/sg; } print $out_fh "\n"; } } } elsif ($decl->type() eq 'gen') { my $name = $decl->name(); my $public = $decl->public(); my $system = $decl->system(); if ($public || $system) { print $out_fh "\n"; } else { my $text = $decl->text(); $text = &expandPE($text); my $quote = '"'; if ($text =~ /\"/s) { $quote = "'"; $text =~ s/\'/\'/sg; } print $out_fh "\n"; } } elsif ($decl->type() eq 'sdata' || $decl->type() eq 'pi') { my $name = $decl->name(); print $out_fh "type()) . " "; my $text = $decl->text(); $text = &expandPE($text); my $quote = '"'; if ($text =~ /\"/s) { $quote = "'"; $text =~ s/\'/\'/sg; } print $out_fh "$quote$text$quote>\n"; } elsif ($decl->type() eq 'ndata' || $decl->type() eq 'cdata') { my $name = $decl->name(); my $public = $decl->public(); my $system = $decl->system(); print $out_fh "type()), " ", $decl->notation(); print $out_fh ">\n"; } elsif ($decl->type() eq 'notation') { my $name = $decl->name(); my $public = $decl->public(); my $system = $decl->system(); print $out_fh "\n"; } else { die "Error: Unexpected declaration type: " . $decl->type() . "\n"; } } close($out_fh) if $output; $dtd->status("Done.", 1); exit 0; # ================================================================= sub expandPE { my $text = shift; my $expanded = ""; while ($text =~ /%(\S+?);/s) { $expanded .= $`; my $post = $'; my $pe = $1; my $keep = 0; foreach my $re (@preserve) { $keep = 1 if $pe =~ /$re/; last if $keep; } if ($keep) { $expanded .= "%$pe;"; $text = $post; } else { my $index = $peindex{$pe}; die "Error: Unexpected PE: $pe\n" if !defined($index); $text = $decls[$index]->text() . $post; } } return $expanded . $text; } __END__ =head1 NAME dtdflatten - Flatten an SGML/XML DTD. =head1 SYNOPSIS dtdflatten [options] =head1 DESCRIPTION B parses a DTD and prints out a flatten/expanded version of it with all parameter entities expanded. The first non-option-related argument provided on the command-line specifies the file to parse. If no filename is given, then the DTD is read from standard input. The flatten DTD is printed to standard output unless the C<--output> option is specified. =head1 OPTIONS =over 4 =item --catalog Specify catalog files to parse for resolving external entity references. This option can be specified multiple times. B Currently, only SGML Open Catalog format is supported. XML Catalog support is not implemented (yet). =item --debug Extra debugging output. This option can be specified multiple times to increase the amount of output. Debugging output is sent to standard error. =item --declaration Specify the SGML declaration. The SGML declaration is parsed to determine the type of DTD being parsed, XML or SGML. The key parts of the SGML declaration examined are the NAMECASE and CHARSET directives to determine the DTD type. If no SGML declaration is available, the C<--xml>, C<--namecase-general>, and C<--namecase-entity> options can be used. =item --output Output file. If not specified, standard output is used. =item --preserve Preserve parameter entity declaration denoted by . This option can be specified multiple times. Note, if matches any portion of a parameter entity, the parameter entity declaration will be preserved. =item --verbose =item --noverbose Print parsing progress. By default, this option is enabled. Verbose output is sent to standard error. If C<--debug> is specified, then this option is automatically enabled. =item --version Print version and synopsis. =item --help Print synopsis and options available. =item --man Print manual page. =back =head1 SEE ALSO L See L for an overview of the DTDParse package. =head1 PREREQUISITES B, B, B =head1 AVAILABILITY EIE =head1 AUTHORS Originally developed by Norman Walsh, Endw@nwalsh.comE. Earl Hood Eearl@earlhood.comE picked up support and maintenance. =head1 COPYRIGHT AND LICENSE See L for copyright and license information.