package XML::Parsepp;
use 5.014;
use strict;
use warnings;
use Carp;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw();
our @EXPORT_OK = qw();
our $VERSION = '0.04';
sub new {
my $class = shift;
my %HParam = @_;
my $self = { _Setters => {}, _Dupatt => '' };
if ($HParam{Handlers}) {
$self->{_Setters} = $HParam{Handlers};
}
if (defined $HParam{dupatt}) {
my $cstr = $HParam{dupatt};
unless ($cstr =~ m{\A [\x{21}-\x{bf}]* \z}xms) {
croak("Error-0005: invalid dupatt");
}
if ($cstr =~ m{[0-9A-Za-z"']}xms) {
croak("Error-0006: invalid dupatt");
}
$self->{_Dupatt} = $cstr;
}
bless $self, $class;
}
sub setHandlers {
my $self = shift;
%{$self->{_Setters}} = (%{$self->{_Setters}}, @_);
}
sub parsefile {
my $self = shift;
my ($inpname) = @_;
open my $ifh, '<', $inpname or croak("Error-0010: Can't open < '$inpname' because $!");
$self->_process_handle($ifh);
close $ifh;
}
sub parse {
my $self = shift;
my ($pitem) = @_;
if (ref($pitem) eq 'GLOB') {
$self->_process_handle($pitem);
}
else {
open my $ifh, '<', \$pitem or croak("Error-0020: Can't open < \\'...' because $!");
$self->_process_handle($ifh);
close $ifh;
}
}
sub _process_handle {
my $self = shift;
my ($fh) = @_;
my $ExpatNB = $self->parse_start
or croak("Error-0030: Can't XML::Parsepp->parse_start");
while (1) {
# Here is the all important reading of a chunk of XML-data from the filehandle...
read($fh, my $buf, 4096);
# We leave immediately as soon as there is no more data left (EOF)
last if $buf eq '';
# and here is the all important parsing of that chunk:
# and we could get exceptions thrown here if the XML is invalid...
$ExpatNB->parse_more($buf);
}
$ExpatNB->parse_done;
}
sub parse_start {
my $self = shift;
my $ExpatNB = {
_Setters => $self->{_Setters},
_Dupatt => $self->{_Dupatt},
_Text => '',
_Action => 'C', # DEFACT: 'C' = character data
_Stage => 1, # DEFSTA: 1 = XMLDecl, 2 = DTD, 3 = StartTag/EndTag, 4 = Rest
_QChar => '',
_ItemCount => 0,
_DoctCount => 0,
_Stack => [],
_Scount => 0,
_Seen => {},
_DocOpen => 0,
_Read_Bytes => 2,
_Read_Lines => 1,
_Read_Cols => 2,
# Structure of '_Var':
# ====================
# L => a simple replacement character
# F => $system is a file name, the content of which will be processed
# T => $value is a replacement text
_Var => {
'amp' => [L => q{&}],
'lt' => [L => q{<}],
'gt' => [L => q{>}],
'quot' => [L => q{"}],
'apos' => [L => q{'}],
},
};
%$ExpatNB = (%$ExpatNB, @_);
bless $ExpatNB, 'XML::Parsepp::ExpatNB';
$ExpatNB->_emit_Init;
return $ExpatNB;
}
package XML::Parsepp::ExpatNB;
our $version = '0.04';
use Carp;
use File::Spec;
sub regexp_pattern {
my ($fl, $pn) = $_[0] =~ m{\A \( \? ([\w\^\-]*) : (.*?) \) \z}xms
or die "Error-0040: Internal Error - Can't disassemble quoted regexp = '$_[0]'";
return ($pn, $fl);
}
sub negated {
my ($pattern, $flags) = regexp_pattern($_[0]);
my ($caret, $class) =
$pattern =~ m{\A \[ (\^?) (.*?) \] \z}xms
or die "Error-0050: Internal Error - Can't parse regexp: $_[0] ==> (pattern = '$pattern', flags = '$flags')";
my $neg_caret = $caret eq '^' ? '' : '^';
my $neg_regexp = qr{[$neg_caret$class]}xms;
return $neg_regexp;
}
my $rx_unc_tok = qr/["']/xms;
my $rx_tok_tok = qr/[!\$&\/;<=\@\\\^`\{\}~\x7f]/xms;
my $rx_syn_tok = qr/[\#\(\]]/xms;
my $rx_tok_syn = qr/[%)*+?]/xms;
my $rx_syn_syn = qr/[,\-.\w:\[|]/xms;
my $ng_unc_tok = negated($rx_unc_tok);
my $ng_tok_tok = negated($rx_tok_tok);
my $ng_syn_tok = negated($rx_syn_tok);
my $ng_tok_syn = negated($rx_tok_syn);
my $ng_syn_syn = negated($rx_syn_syn);
sub parse_more {
my $self = shift;
$self->_more(0, '', $_[0]);
}
sub _more {
my $self = shift;
my $level = shift;
my $hist = shift;
my $buffer_text = $self->{_Text}.$_[0]; # Take whatever there was before and add the new parse_more parameter
$self->{_Text} = '';
my @buffer_stack = @{$self->{_Stack}};
$self->{_Stack} = [];
if (length($buffer_text) > 100_000) {
$self->crknum("Error-0060: Internal Error - Buffer overflow");
}
my $buffer_action = $self->{_Action};
my $buffer_breakout = 0;
until ($buffer_breakout or $buffer_text eq '') {
if ($buffer_action eq 'C') { # DEFACT: 'C' = character data
if ($self->{_Stage} <= 2) {
my ($emit, $ch, $remainder);
if ($buffer_text =~ m{\A (\s*) (\S) (.*) \z}xms) {
($emit, $ch, $remainder) = ($1, $2, $3);
}
elsif ($buffer_text =~ m{\A \s* \z}xms) {
($emit, $ch, $remainder) = ($buffer_text, '', '');
}
else {
$self->crknum("Error-0070: Internal Error - Can't parse buffer_text = '$buffer_text'");
}
$self->_emit_Char($emit);
$self->_update_ctr($emit) if $level == 0;
if ($ch eq '') {
$buffer_text = '';
$buffer_breakout = 1;
}
elsif ($ch eq '<') {
$buffer_text = $ch.$remainder;
$buffer_action = '<'; # DEFACT: '<' = anything that starts with '<'
next;
}
elsif ($ch eq ']' and $self->{_DocOpen} > 0) {
$buffer_text = $ch.$remainder;
$buffer_action = ']'; # DEFACT: ']' = anything that starts with ']'
next;
}
elsif ($ch eq q{'} or $ch eq q{"}) {
$self->_update_ctr($ch) if $level == 0;
$buffer_text = $remainder;
$self->{_QChar} = $ch;
$buffer_action = 'F'; # DEFACT: 'F' = find quote character $self->{_QChar}
next;
}
elsif ($ch eq '>') {
$self->crknum("Error-0080: syntax error");
}
elsif ($ch =~ $rx_syn_syn) {
$self->_update_ctr($ch) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'G'; # DEFACT: 'G' = find word delimited by white-space
next;
}
elsif ($ch =~ $rx_syn_tok) {
$self->crknum("Error-0090: syntax error");
}
else {
$self->crknum("Error-0100: not well-formed (invalid token)");
}
}
else {
if ($buffer_text =~ m{\A ([^<&]*) ([<&]) (.*) \z}xms) {
my ($emit, $ch, $remainder) = ($1, $2, $3);
$self->_emit_Char($emit);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = $ch.$remainder;
$buffer_action = $ch; # DEFACT: '<' = anything that starts with '<' or '&' = anything that starts with '&'
next;
}
$self->_emit_Char($buffer_text);
$self->_update_ctr($buffer_text) if $level == 0;
$buffer_text = '';
$buffer_breakout = 1;
next;
}
}
elsif ($buffer_action eq 'F') {
my ($emit, $ch, $remainder);
if ($self->{_QChar} eq q{'}) {
if ($buffer_text =~ m{\A ([^']*) (') (.*) \z}xms) {
($emit, $ch, $remainder) = ($1, $2, $3);
}
else {
($emit, $ch, $remainder) = ($buffer_text, '', '');
}
}
elsif ($self->{_QChar} eq q{"}) {
if ($buffer_text =~ m{\A ([^"]*) (") (.*) \z}xms) {
($emit, $ch, $remainder) = ($1, $2, $3);
}
else {
($emit, $ch, $remainder) = ($buffer_text, '', '');
}
}
else {
$self->crknum("Error-0110: Internal Error - invalid QChar = '".$self->{_QChar}."'");
}
$self->_update_ctr($emit) if $level == 0;
if ($ch eq '') {
$buffer_text = '';
$buffer_breakout = 1;
next;
}
else {
$self->crknum("Error-0120: not well-formed (invalid token)");
}
}
elsif ($buffer_action eq 'G') {
my ($emit, $ch, $remainder);
if ($buffer_text =~ m{\A (\S*) (\s) (.*) \z}xms) {
($emit, $ch, $remainder) = ($1, $2, $3);
}
else {
($emit, $ch, $remainder) = ($buffer_text, '', '');
}
$self->_update_ctr($emit) if $level == 0;
if ($emit =~ m{($ng_syn_syn)}xms) {
my $out = $1;
if ($out =~ $rx_tok_syn or $out eq '>') {
$self->crknum("Error-0130: syntax error");
}
else {
$self->crknum("Error-0140: not well-formed (invalid token)");
}
}
else {
if ($ch eq '') {
$buffer_text = '';
$buffer_breakout = 1;
next;
}
else {
$self->crknum("Error-0150: syntax error");
}
}
}
elsif ($buffer_action eq '&') { # DEFACT: '&' = anything that starts with '&'
if ($buffer_text =~ m{\A . ([^<;]*) ([<;]) (.*) \z}xms) {
my ($emit, $ch, $remainder) = ($1, $2, $3);
unless ($ch eq ';') {
$self->crknum("Error-0160: not well-formed (invalid token)");
}
$self->_emit_Amp($level, $hist, '&'.$emit.';');
$self->_update_ctr('&'.$emit.';') if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$buffer_breakout = 1;
next;
}
elsif ($buffer_action eq '<') { # DEFACT: '<' = anything that starts with '<'
if (length($buffer_text) < 3) {
$buffer_breakout = 1;
next;
}
my $c1 = substr($buffer_text, 0, 1);
my $c2 = substr($buffer_text, 1, 1);
my $c3 = substr($buffer_text, 2, 1);
if ($c2 eq '!' and $c3 eq '-') {
$buffer_action = '!'; # DEFACT: '' a comment
next;
}
if ($c2 eq '!' and $c3 eq '[') {
$buffer_action = 'A'; # DEFACT: '' a CDATA section
next;
}
if ($c2 eq '!' and $c3 =~ m{\w}xms) {
$buffer_action = 'D'; # DEFACT: '' a DTD section (DOCTYPE, ELEMENT, ATTLIST, etc...)
next;
}
if ($c2 =~ m{[,\-.\w:\[|]}xms) {
$buffer_action = 'S'; # DEFACT: 'S' = start tag
next;
}
if ($c2 eq '/') {
$buffer_action = 'E'; # DEFACT: 'E' = end tag
next;
}
if ($c2 eq '?') {
$buffer_action = '?'; # DEFACT: '?' = processing instruction
next;
}
$self->crknum("Error-0170: not well-formed (invalid token)");
}
elsif ($buffer_action eq '!') { # DEFACT: '' a comment
if (length($buffer_text) < 4) {
$buffer_breakout = 1;
next;
}
my $prefix = substr($buffer_text, 0, 4);
unless ($prefix eq ') (.*) \z}xms) {
my ($emit, $remainder) = ($1, $2);
$self->_emit_Comment($emit);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$buffer_breakout = 1;
next;
}
elsif ($buffer_action eq 'A') { # DEFACT: '' beginning of a CDATA section
if (length($buffer_text) < 9) {
$buffer_breakout = 1;
next;
}
my $prefix = substr($buffer_text, 0, 9);
my $remainder = substr($buffer_text, 9);
unless ($prefix eq 'crknum("Error-0190: not well-formed (invalid token)");
}
$self->_emit_Cdatastart;
$self->_update_ctr($prefix) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'B'; # DEFACT: 'B' = '' text of a CDATA section
next;
}
elsif ($buffer_action eq 'B') { # DEFACT: 'B' = '' text of a CDATA section
if ($buffer_text =~ m{\A (.*?) (\]\]>) (.*) \z}xms) {
my ($emit, $suffix, $remainder) = ($1, $2, $3);
$self->_emit_Char($emit);
$self->_update_ctr($emit) if $level == 0;
$self->_emit_Cdataend;
$self->_update_ctr($suffix) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$self->_emit_Char($buffer_text);
$self->_update_ctr($buffer_text) if $level == 0;
$buffer_text = '';
$buffer_breakout = 1;
next;
}
# pour identifier les différents possibilités de DTD (DOCTYPE, ELEMENT, ATTLIST, etc...), voir: http://www.u-picardie.fr/~ferment/xml/xml02.html
elsif ($buffer_action eq 'D') { # DEFACT: '' a DTD section (DOCTYPE, ELEMENT, ATTLIST, etc...)
my $finpos = -1;
pos($buffer_text) = 0;
while ($buffer_text =~ m{\G \s* (?: ([^'"\s]+) | ' [^']* ' | " [^"]* " ) }xmsgc) {
if (defined $1) {
my $mp = $-[1];
my $fragment = $1;
if ($fragment =~ m{[>\[]}xms) {
$finpos = $mp + $-[0];
last;
}
}
}
if ($finpos != -1) {
my $terminal = substr $buffer_text, $finpos, 1;
unless ($terminal eq '>' or $terminal eq '[') {
$self->crknum("Error-0200: Internal Error - found terminal char ('$terminal') not equal to ('>', '[')");
}
my $emit = substr($buffer_text, 0, $finpos + 1);
$self->_emit_Dtd($emit);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = substr($buffer_text, $finpos + 1);
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$buffer_breakout = 1;
next;
}
elsif ($buffer_action eq ']') { # DEFACT: ']' = closing doctype parenthesis ']>'
if (length($buffer_text) < 2) {
$buffer_breakout = 1;
next;
}
unless ($self->{_DocOpen}) {
$self->crknum("Error-0210: Internal Error - Can't close a closed Doctype");
}
unless ($buffer_text =~ m{\A (\] \s* >) (.*) \z}xms) {
$self->crknum("Error-0220: not well-formed (invalid token)");
}
my ($emit, $remainder) = ($1, $2);
$self->_emit_CloseDoc($emit);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
elsif ($buffer_action eq 'S') { # DEFACT: 'S' = start tag
my $finpos = -1;
pos($buffer_text) = 0;
while ($buffer_text =~ m{\G \s* (?: ([^'"\s]+) | ' [^']* ' | " [^"]* " ) }xmsgc) {
if (defined $1) {
my $mp = $-[1];
my $fragment = $1;
if ($fragment =~ m{>}xms) {
$finpos = $mp + $-[0];
last;
}
}
}
if ($finpos != -1) {
my $emit = substr($buffer_text, 0, $finpos + 1);
$self->_emit_Start($emit, \@buffer_stack, $level);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = substr($buffer_text, $finpos + 1);
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$buffer_breakout = 1;
next;
}
elsif ($buffer_action eq 'E') { # DEFACT: 'E' = end tag
if ($buffer_text =~ m{\A ([^>]* [>]) (.*) \z}xms) {
my ($emit, $remainder) = ($1, $2);
$self->_emit_End($emit, \@buffer_stack, $hist);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$buffer_breakout = 1;
next;
}
elsif ($buffer_action eq '?') { # DEFACT: '?' = processing instruction
if ($buffer_text =~ m{\A ([^>]* [>]) (.*) \z}xms) {
my ($emit, $remainder) = ($1, $2);
$self->_emit_Proc($emit);
$self->_update_ctr($emit) if $level == 0;
$buffer_text = $remainder;
$buffer_action = 'C'; # DEFACT: 'C' = character data
next;
}
$buffer_breakout = 1;
next;
}
else {
$self->crknum("Error-0230: Internal Error - invalid buffer_action = '$buffer_action'");
}
}
$self->{_Text} = $buffer_text;
$self->{_Stack} = [@buffer_stack];
$self->{_Action} = $buffer_action;
}
sub _emit_Init {
my $self = shift;
my $cb_Init = $self->{_Setters}{Init};
if ($cb_Init) {
$cb_Init->($self);
}
}
sub _emit_Final {
my $self = shift;
my $cb_Final = $self->{_Setters}{Final};
if ($cb_Final) {
$cb_Final->($self);
}
}
sub _emit_Amp {
my $self = shift;
my $level = shift;
my $hist = shift;
my ($ampersand) = @_;
my ($var) = $ampersand =~ m{\A & ([^&;]+) ; \z}xms
or $self->crknum("Error-0240: Internal Error - Can't parse ampersand = '$ampersand'");
if ($var =~ m{\A \# (\d+) \z}xms) {
my $value = chr($1);
$self->_plausi('C'); # PLAUSI ==> 'C' = Character Data
$self->{_ItemCount}++;
my $cb_Char = $self->{_Setters}{Char};
if ($cb_Char) {
$cb_Char->($self, $value);
}
}
else {
my $rhs = $self->{_Var}{$var};
unless (defined $rhs) {
if ($level == 0) {
$self->crknum("Error-0250: undefined entity");
}
else {
$self->crknum("Error-0260: error in processing external entity reference");
}
}
my ($code, $value) = @$rhs;
# Structure of ($code, $value):
# =============================
# L => a simple replacement character
# F => $system is a file name, the content of which will be processed
# T => $value is a replacement text
if ($code eq 'L') {
$self->_plausi('C'); # PLAUSI ==> 'C' = Character Data
$self->{_ItemCount}++;
my $cb_Char = $self->{_Setters}{Char};
if ($cb_Char) {
$cb_Char->($self, $value);
}
}
elsif ($code eq 'F') {
if ($self->{_Seen}{$var}) {
$self->crknum("Error-0270: error in processing external entity reference");
}
$self->{_Seen}{$var} = 1;
my $cb_Exen = $self->{_Setters}{ExternEnt};
if ($cb_Exen) {
# ExternEnt (Expat, Base, Sysid, Pubid)
my $buf = $cb_Exen->($self, undef, $value, undef);
$self->_more($level + 1, $hist.'X', $buf);
my $cb_Exef = $self->{_Setters}{ExternEntFin};
if ($cb_Exef) {
# ExternEntFin (Expat)
my $buf = $cb_Exef->($self);
}
unless ($self->{_Text} eq '') {
$self->crknum("Error-0280: error in processing external entity reference");
}
if (@{$self->{_Stack}}) {
$self->crknum("Error-0290: error in processing external entity reference");
}
}
else {
my $filepath = File::Spec->rel2abs($value);
open my $ifh, '<', $value
or $self->crknum("Error-0300: Handler couldn't resolve external entity\n"."404 File `$filepath' does not exist");
while (1) {
read($ifh, my $buf, 4096);
last if $buf eq '';
$self->_more($level + 1, $hist.'F', $buf);
}
close $ifh;
unless ($self->{_Text} eq '') {
$self->crknum("Error-0310: error in processing external entity reference");
}
if (@{$self->{_Stack}}) {
$self->crknum("Error-0320: error in processing external entity reference");
}
}
$self->{_Seen}{$var} = 0;
}
elsif ($code eq 'T') {
if ($self->{_Seen}{$var}) {
$self->crknum("Error-0330: recursive entity reference");
}
$self->{_Seen}{$var} = 1;
$self->_more($level + 1, $hist.'T', $value);
unless ($self->{_Text} eq '') {
$self->crknum("Error-0340: unclosed token");
}
if (@{$self->{_Stack}}) {
$self->crknum("Error-0350: asynchronous entity");
}
$self->{_Seen}{$var} = 0;
}
else {
$self->crknum("Error-0360: Internal Error - Found invalid code '$code' not equal to ('F', 'L', 'T')");
}
}
}
sub _emit_Char {
my $self = shift;
my ($emit) = @_;
$self->_plausi('C'); # PLAUSI ==> 'C' = Character Data
my $default = 0;
unless ($self->{_Stage} == 3) {
$default = 1;
if ($emit =~ m{\S}xms) {
if ($self->{_Stage} == 4) {
$self->crknum("Error-0370: junk after document element");
}
else {
$self->crknum("Error-0380: Internal Error - non-space data");
}
}
}
if ($default) {
unless ($emit eq '') {
$self->{_ItemCount}++;
my $cb_Default = $self->{_Setters}{Default};
if ($cb_Default) {
# Default (Expat, String)
$cb_Default->($self, $emit);
}
}
}
else {
pos($emit) = 0;
while ($emit =~ m{\G (?: ([^\n]+) | ([\n]) ) }xmsgc) {
my $fragment;
if (defined $1) {
$fragment = $1;
}
elsif (defined $2) {
$fragment = $2;
}
else {
$self->crknum("Error-0390: Internal Error - inconsistent result from regexp");
}
unless ($fragment eq '') {
$self->{_ItemCount}++;
my $cb_Char = $self->{_Setters}{Char};
if ($cb_Char) {
$cb_Char->($self, $fragment);
}
}
}
unless ($emit =~ m{\G (.*) \z}xms) {
$self->crknum("Error-0400: Internal Error - Can't find regexp rest in CHAR");
}
my $rest = $1;
if ($rest ne '') {
$self->crknum("Error-0410: Internal Error - Invalid rest ($rest) in CHAR regexp");
}
}
}
sub _emit_Start {
my $self = shift;
my ($emit, $bstack, $level) = @_;
my ($elem, $param, $term) = $emit =~ m{\A < \s* ([,\-.\w:\[|]+) (.*?) (/?) > \z}xms
or $self->crknum("Error-0420: Internal Error - Can't decompose start = '$emit'");
my @attr;
my %att_hash;
pos($param) = 0;
while ($param =~ m{\G \s* ([,\-.\w:\[|]+) \s* = \s* (?: ' ([^']*) ' | " ([^"]*) " ) }xmsgc) {
my $def_var = $1;
my $def_txt;
if (defined $2) {
$def_txt = $2;
}
elsif (defined $3) {
$def_txt = $3;
}
else {
$self->crknum("Error-0430: Internal Error - Can't match any param");
}
if ($def_txt =~ m{<}xms) {
$self->crknum("Error-0440: not well-formed (invalid token)");
}
$def_txt =~ s{\n}' 'xmsg;
my $def_res = '';
pos($def_txt) = 0;
while ($def_txt =~ m{\G ([^&]*) & ([^&;]+) ; }xmsgc) {
$def_res .= $1;
my $var = $2;
my $rhs = $self->{_Var}{$var};
unless (defined $rhs) {
$self->crknum("Error-0450: undefined entity");
}
my ($code, $value) = @$rhs;
# Structure of ($code, $value):
# =============================
# L => a simple replacement character
# F => $system is a file name, the content of which will be processed
# T => $value is a replacement text
unless ($code eq 'L') {
$self->crknum("Error-0460: reference to external entity in attribute");
}
$def_res .= $value;
}
unless ($def_txt =~ m{\G (.*) \z}xms) {
$self->crknum("Error-0470: Internal Error - Can't find regexp rest in ELEMENT");
}
my $rest = $1;
if ($rest =~ m{&}xms) {
$self->crknum("Error-0480: not well-formed (invalid token)");
}
$def_res .= $rest;
if (defined $att_hash{$def_var}) {
if ($self->{_Dupatt} eq '') {
$self->crknum("Error-0485: duplicate attribute");
}
$att_hash{$def_var} .= $self->{_Dupatt}.$def_res;
}
else {
$att_hash{$def_var} = $def_res;
}
push @attr, $def_var, $def_res;
}
unless ($param =~ m{\G (.*) \z}xms) {
$self->crknum("Error-0490: Internal Error - Can't find regexp rest in START");
}
my $rest = $1;
if ($rest =~ m{\S}xms) {
if ($level == 0) {
$self->crknum("Error-0500: not well-formed (invalid token)");
}
else {
$self->crknum("Error-0510: error in processing external entity reference");
}
}
unless ($self->{_Dupatt} eq '') {
@attr = map { $_ => $att_hash{$_} } sort(keys %att_hash);
}
$self->_plausi('S'); # PLAUSI ==> 'S' = Start Tag
$self->{_Scount}++;
push @$bstack, $elem;
$self->{_ItemCount}++;
my $cb_Start = $self->{_Setters}{Start};
if ($cb_Start) {
# Start (Expat, Element [, Attr, Val [,...]])
$cb_Start->($self, $elem, @attr);
}
if ($term eq '/') {
if ($self->{_Scount} < 1) {
$self->crknum("Error-0520: Internal Error - Underflow in Scount");
}
$self->{_Scount}--;
my $ele_from_stack = pop @$bstack;
unless (defined $ele_from_stack) {
$self->crknum("Error-0530: Internal Error - Underflow in stack");
}
unless ($elem eq $ele_from_stack) {
$self->crknum("Error-0540: Internal Error - Mismatch of Start- and End-tag, start = '$ele_from_stack', end = '$elem'");
}
$self->_plausi('E'); # PLAUSI ==> 'E' = End Tag
$self->{_ItemCount}++;
my $cb_End = $self->{_Setters}{End};
if ($cb_End) {
# End (Expat, Element)
$cb_End->($self, $elem);
}
}
elsif ($term ne '') {
$self->crknum("Error-0550: Internal Error - in START found closing tag '$term'");
}
}
sub _emit_End {
my $self = shift;
my ($emit, $bstack, $hist) = @_;
my ($elem) = $emit =~ m{\A < \s* / \s* ([,\-.\w:\[|]+) \s* > \z}xms
or $self->crknum("Error-0560: not well-formed (invalid token)");
if ($self->{_Scount} < 1) {
$self->crknum("Error-0570: not well-formed (invalid token)");
}
$self->{_Scount}--;
my $ele_from_stack = pop @$bstack;
unless (defined $ele_from_stack) {
if ($hist =~ m{F}xms) {
$self->crknum("Error-0580: error in processing external entity reference");
}
else {
$self->crknum("Error-0590: asynchronous entity");
}
}
unless ($elem eq $ele_from_stack) {
$self->crknum("Error-0600: mismatched tag");
}
$self->_plausi('E'); # PLAUSI ==> 'E' = End Tag
$self->{_ItemCount}++;
my $cb_End = $self->{_Setters}{End};
if ($cb_End) {
# End (Expat, Element)
$cb_End->($self, $elem);
}
}
sub _emit_Proc {
my $self = shift;
my ($emit) = @_;
my ($target, $data) = $emit =~ m{\A <\? ([,\-.\w:\[|]+) \s* (.*) \?> \z}xms
or $self->crknum("Error-0610: not well-formed (invalid token)");
if ($target =~ m{\A xml}xmsi) {
unless ($self->{_ItemCount} == 0) {
$self->crknum("Error-0620: XML or text declaration not at start of entity");
}
my @attr;
pos($data) = 0;
while ($data =~ m{\G \s* ([,\-.\w:\[|]+) \s* = \s* (?: ' ([^']*) ' | " ([^"]*) " ) }xmsgc) {
if (defined $2) {
push @attr, [$1, $2];
}
elsif (defined $3) {
push @attr, [$1, $3];
}
else {
$self->crknum("Error-0630: Internal Error - Can't match any param");
}
}
unless ($data =~ m{\G (.*) \z}xms) {
$self->crknum("Error-0640: Internal Error - Can't find regexp rest in PROC");
}
my $rest = $1;
if ($rest =~ m{\S}xms) {
$self->crknum("Error-0650: XML declaration not well-formed");
}
#
my ($ver, $enc, $stand);
for my $at (@attr) {
if ($at->[0] eq 'version') {
if (defined $ver) {
$self->crknum("Error-0660: XML declaration not well-formed");
}
$ver = $at->[1];
}
elsif ($at->[0] eq 'encoding') {
if (defined $enc) {
$self->crknum("Error-0670: XML declaration not well-formed");
}
$enc = $at->[1];
}
elsif ($at->[0] eq 'standalone') {
if (defined $stand) {
$self->crknum("Error-0680: XML declaration not well-formed");
}
if ($at->[1] eq 'yes') {
$stand = '1';
}
elsif ($at->[1] eq 'no') {
$stand = '';
}
else {
$self->crknum("Error-0690: XML declaration not well-formed");
}
}
else {
$self->crknum("Error-0700: XML declaration not well-formed");
}
}
unless (defined $ver) {
$self->crknum("Error-0710: XML declaration not well-formed");
}
$self->_plausi('X'); # PLAUSI ==> 'X' = XML Declaration
$self->{_ItemCount}++;
my $cb_Decl = $self->{_Setters}{XMLDecl};
if ($cb_Decl) {
# XMLDecl (Expat, Version, Encoding, Standalone)
$cb_Decl->($self, $ver, $enc, $stand);
}
}
else {
$self->_plausi('P'); # PLAUSI ==> 'P' = Processing Instruction
$self->{_ItemCount}++;
my $cb_Proc = $self->{_Setters}{Proc};
if ($cb_Proc) {
# Proc (Expat, Target, Data)
$cb_Proc->($self, $target, $data);
}
}
}
sub _emit_Comment {
my $self = shift;
my ($emit) = @_;
my ($comment) = $emit =~ m{\A \z}xms or
$self->crknum("Error-0720: Internal Error - Can't decompose comment '$emit'");
$self->_plausi('!'); # PLAUSI ==> '!' = comment
$self->{_ItemCount}++;
my $cb_Comment = $self->{_Setters}{Comment};
if ($cb_Comment) {
$cb_Comment->($self, $comment);
}
}
sub _emit_Cdatastart {
my $self = shift;
$self->_plausi('A'); # PLAUSI ==> 'A' = CData
my $cb_CdataStart = $self->{_Setters}{CdataStart};
if ($cb_CdataStart) {
$cb_CdataStart->($self);
}
}
sub _emit_Cdataend {
my $self = shift;
$self->_plausi('A'); # PLAUSI ==> 'A' = CData
my $cb_CdataEnd = $self->{_Setters}{CdataEnd};
if ($cb_CdataEnd) {
$cb_CdataEnd->($self);
}
}
sub _emit_CloseDoc {
my $self = shift;
my ($emit) = @_;
$emit =~ m{\A \] \s* > \z}xms
or $self->crknum("Error-0730: Internal Error - Invalid closedoc: '$emit'");
unless ($self->{_DocOpen}) {
$self->crknum("Error-0740: Internal Error - closedoc found without DocOpen");
}
$self->_plausi('F'); # PLAUSI ==> 'F' = DocTypeFin
$self->{_DocOpen} = 0;
my $cb_DoctypeFin = $self->{_Setters}{DoctypeFin};
if ($cb_DoctypeFin) {
# DoctypeFin (Expat)
$cb_DoctypeFin->($self);
}
}
sub _emit_Dtd {
my $self = shift;
my ($emit) = @_;
if ($self->{_Stage} > 2) {
$self->crknum("Error-0750: not well-formed (invalid token)");
}
my ($type, $data, $term) = $emit =~ m{\A ]) \z}xms
or $self->crknum("Error-0760: not well-formed (invalid token)");
my @elist;
pos($data) = 0;
while ($data =~ m{\G \s* (?: ([^'"\(\s]+) | ' ([^']*) ' | " ([^"]*) " | \( ([^\)]*) \) ) }xmsgc) {
if (defined $1) {
push @elist, ['B' => $1];
}
elsif (defined $2) {
push @elist, ['Q' => $2, q{'}];
}
elsif (defined $3) {
push @elist, ['Q' => $3, q{"}];
}
elsif (defined $4) {
my $paran = $4;
$paran =~ s{\s}''xmsg;
push @elist, ['P' => $paran];
}
else {
$self->crknum("Error-0770: Internal Error - regexp undefined");
}
}
unless ($data =~ m{\G (.*) \z}xms) {
$self->crknum("Error-0780: Internal Error - Can't find regexp rest");
}
my $rest = $1;
if ($rest =~ m{\S}xms) {
$self->crknum("Error-0790: syntax error");
}
if ($type eq 'DOCTYPE') {
$self->_parse_Doctype(\@elist, $term);
}
elsif ($type eq 'ENTITY') {
$self->_parse_Entity(\@elist);
}
elsif ($type eq 'ELEMENT') {
$self->_parse_Element(\@elist);
}
elsif ($type eq 'ATTLIST') {
$self->_parse_Attlist(\@elist);
}
elsif ($type eq 'NOTATION') {
$self->_parse_Notation(\@elist);
}
else {
$self->crknum("Error-0800: syntax error");
}
unless ($type eq 'DOCTYPE' or $term eq '>') {
$self->crknum("Error-0810: syntax error");
}
}
sub _parse_Doctype {
my $self = shift;
my ($plist, $terminal) = @_;
$self->{_DoctCount}++;
unless ($self->{_DoctCount} == 1) {
$self->crknum("Error-0820: syntax error");
}
#
# 'DOCT nam=[racine], sys=[URI-de-la-dtd], pub=[*undef*], int=[]'
# 'DOCF'
#
# 'DOCT nam=[svg], sys=[http://www.w3.org/Graphics/SVG/SVG-19991203.dtd], pub=[-//W3C//DTD SVG December 1999//EN], int=[]'
# 'DOCF'
# int=1
# 'DOCT nam=[dialogue], sys=[*undef*], pub=[*undef*], int=[1]'
my $param0 = shift(@$plist);
unless (defined $param0) {
$self->crknum("Error-0830: Internal Error - Not enough elements in DOCTYPE");
}
unless ($param0->[0] eq 'B') {
$self->crknum("Error-0840: syntax error");
}
my $name = $param0->[1];
my $intern = $terminal eq '[' ? '1' : '';
my ($system, $public);
my $param1 = shift(@$plist);
if (defined $param1) {
unless ($param1->[0] eq 'B') {
$self->crknum("Error-0850: syntax error");
}
my $syspub;
if ($param1->[1] eq 'SYSTEM') {
$syspub = 'S';
}
elsif ($param1->[1] eq 'PUBLIC') {
$syspub = 'P';
}
else {
$self->crknum("Error-0860: syntax error");
}
my $param2 = shift(@$plist);
unless (defined $param2) {
$self->crknum("Error-0870: syntax error");
}
unless ($param2->[0] eq 'Q') {
$self->crknum("Error-0880: syntax error");
}
if ($syspub eq 'S') {
$system = $param2->[1];
}
else {
$public = $param2->[1];
}
my $param3 = shift(@$plist);
if (defined $param3) {
unless ($param3->[0] eq 'Q') {
$self->crknum("Error-0890: syntax error");
}
if ($syspub eq 'S') {
$public = $param3->[1];
}
else {
$system = $param3->[1];
}
}
}
if (defined $public) {
if ($public =~ m{[\]\[\\]}xms) {
$self->crknum("Error-0900: illegal character(s) in public id");
}
}
if (@$plist) {
$self->crknum("Error-0910: syntax error");
}
if ($self->{_DocOpen}) {
$self->crknum("Error-0920: Internal Error - DOC is open");
}
$self->_plausi('D'); # PLAUSI ==> 'D' = DocType
$self->{_DocOpen} = 1;
$self->{_ItemCount}++;
my $cb_Doctype = $self->{_Setters}{Doctype};
if ($cb_Doctype) {
# Doctype (Expat, Name, Sysid, Pubid, Internal)
$cb_Doctype->($self, $name, $system, $public, $intern);
}
unless ($intern eq '1') {
$self->_plausi('F'); # PLAUSI ==> 'F' = DocTypeFin
$self->{_DocOpen} = 0;
my $cb_DoctypeFin = $self->{_Setters}{DoctypeFin};
if ($cb_DoctypeFin) {
# DoctypeFin (Expat)
$cb_DoctypeFin->($self);
}
}
}
sub _parse_Entity {
my $self = shift;
my ($plist) = @_;
#
# 'ENTT nam=[prl], val=[madame pernelle], sys=[*undef*], pub=[*undef*], nda=[*undef*], isp=[*undef*]'
#
# 'ENTT nam=[dialogue_b], val=[*undef*], sys=[dialogue5b.xml], pub=[*undef*], nda=[*undef*], isp=[*undef*]'
#
# 'UNPS ent=[animation], base=[*undef*], sys=[../anim.fla], pub=[*undef*], not=[flash]',
#
# 'ENTT nam=[nom3], val=[chaine3], sys=[*undef*], pub=[*undef*], nda=[*undef*], isp=[1]',
#
# 'ENTT nam=[nom4], val=[*undef*], sys=[uri3], pub=[*undef*], nda=[*undef*], isp=[1]',
my $isparam;
if (@$plist and $plist->[0][0] eq 'B' and $plist->[0][1] eq '%') {
$isparam = '1';
shift @$plist;
}
my $param0 = shift(@$plist);
unless (defined $param0) {
$self->crknum("Error-0930: Internal Error - Not enough elements in ENTITY");
}
unless ($param0->[0] eq 'B') {
$self->crknum("Error-0940: syntax error");
}
my $name = $param0->[1];
my ($value, $val_quote, $base, $system, $sys_quote, $public, $ndata);
my $param1 = shift(@$plist);
unless (defined $param1) {
$self->crknum("Error-0950: syntax error");
}
if ($param1->[0] eq 'Q') {
$value = $param1->[1];
$val_quote = $param1->[2];
}
else {
unless ($param1->[1] eq 'SYSTEM') {
if ($param1->[1] eq 'PUBLIC') {
$self->crknum("Error-0960: syntax error");
}
else {
$self->crknum("Error-0970: not well-formed (invalid token)");
}
}
my $param2 = shift(@$plist);
unless (defined $param2) {
$self->crknum("Error-0980: syntax error");
}
unless ($param2->[0] eq 'Q') {
$self->crknum("Error-0990: syntax error");
}
$system = $param2->[1];
$sys_quote = $param2->[2];
my $param3 = shift(@$plist);
if (defined $param3) {
unless ($param3->[0] eq 'B') {
$self->crknum("Error-1000: syntax error");
}
unless ($param3->[1] eq 'NDATA') {
$self->crknum("Error-1010: syntax error");
}
my $param4 = shift(@$plist);
unless (defined $param4) {
$self->crknum("Error-1020: syntax error");
}
unless ($param4->[0] eq 'Q' or $param4->[0] eq 'B') {
$self->crknum("Error-1030: syntax error");
}
$ndata = $param4->[1];
}
}
if (@$plist) {
$self->crknum("Error-1040: syntax error");
}
unless ($self->{_DocOpen}) {
$self->crknum("Error-1050: syntax error");
}
if (defined $ndata) {
$self->_plausi('U'); # PLAUSI ==> 'U' = Unparsed
$self->{_ItemCount}++;
my $cb_Unparsed = $self->{_Setters}{Unparsed};
if ($cb_Unparsed) {
# Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation)
$cb_Unparsed->($self, $name, $base, $system, $public, $ndata);
}
}
else {
if (defined $self->{_Var}{$name}) {
#~ Redefinition of '$name' --> emit 2 or 3 Default lines
my $object = defined($value) ? $val_quote.$value.$val_quote : $sys_quote.$system.$sys_quote;
$self->_plausi('T'); # PLAUSI ==> 'T' = Entity
$self->{_ItemCount}++;
my $cb_Default = $self->{_Setters}{Default};
if ($cb_Default) {
# Default (Expat, String)
$cb_Default->($self, $name);
$cb_Default->($self, $object);
unless (defined $value) {
$cb_Default->($self, '>');
}
}
}
else {
unless (defined $isparam) {
if (defined $value) {
$self->{_Var}{$name} = [T => $value]; # T => $value is a replacement text
}
else {
$self->{_Var}{$name} = [F => $system]; # F => $system is a file name, the content of which will be processed
}
}
$self->_plausi('T'); # PLAUSI ==> 'T' = Entity
$self->{_ItemCount}++;
my $cb_Entity = $self->{_Setters}{Entity};
if ($cb_Entity) {
# Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)
$cb_Entity->($self, $name, $value, $system, $public, $ndata, $isparam);
}
}
}
}
sub _parse_Element {
my $self = shift;
my ($plist) = @_;
#
# 'ELEM nam=[replique], mod=[(personnage,texte)]',
#
# 'ELEM nam=[personnage], mod=[(#PCDATA)]',
my $param0 = shift(@$plist);
unless (defined $param0) {
$self->crknum("Error-1060: Internal Error - Not enough elements in ELEMENT");
}
unless ($param0->[0] eq 'B') {
$self->crknum("Error-1070: syntax error");
}
my $name = $param0->[1];
my $param1 = shift(@$plist);
unless (defined $param1) {
$self->crknum("Error-1080: syntax error");
}
unless ($param1->[0] eq 'P') {
$self->crknum("Error-1090: syntax error");
}
my $model = $param1->[1];
unless ($self->{_DocOpen}) {
$self->crknum("Error-1100: syntax error");
}
$self->_plausi('L'); # PLAUSI ==> 'L' = Element
$self->{_ItemCount}++;
my $cb_Element = $self->{_Setters}{Element};
if ($cb_Element) {
# Element (Expat, Name, Model)
$cb_Element->($self, $name, "($model)");
}
if (@$plist) {
$self->crknum("Error-1110: syntax error");
}
}
sub _parse_Attlist {
my $self = shift;
my ($plist) = @_;
#
# 'ATTL eln=[task], atn=[status], typ=[(important|normal)], def=[\'normal\'], fix=[*undef*]',
#
# 'ATTL eln=[task], atn=[status], typ=[NMTOKEN], def=[\'monthly\'], fix=[1]',
#
# 'ATTL eln=[description], atn=[xml:lang], typ=[NMTOKEN], def=[\'en\'], fix=[1]',
#
# 'ATTL eln=[code], atn=[xml:space], typ=[(default|preserve)], def=[\'preserve\'], fix=[*undef*]',
#
# 'ATTL eln=[personnage], atn=[attitude], typ=[CDATA], def=[#REQUIRED], fix=[*undef*]',
# 'ATTL eln=[personnage], atn=[geste], typ=[CDATA], def=[#IMPLIED], fix=[*undef*]',
#
# 'ATTL eln=[texte], atn=[ton], typ=[(normal|fort|faible)], def=[\'normal\'], fix=[*undef*]',
my $param0 = shift(@$plist);
unless (defined $param0) {
$self->crknum("Error-1120: Internal Error - Not enough elements in ATTLIST");
}
unless ($param0->[0] eq 'B') {
$self->crknum("Error-1130: syntax error");
}
my $name = $param0->[1];
while (@$plist) {
my $param1 = shift(@$plist);
unless (defined $param1) {
$self->crknum("Error-1140: Internal Error - Not enough elements in ATTLIST-PARAM1");
}
unless ($param1->[0] eq 'B') {
$self->crknum("Error-1150: syntax error");
}
my $attrib = $param1->[1];
my $param2 = shift(@$plist);
unless (defined $param2) {
$self->crknum("Error-1160: syntax error");
}
my $atype;
if ($param2->[0] eq 'B' and $param2->[1] eq 'NOTATION') {
my $pm2b = shift(@$plist);
unless (defined $pm2b) {
$self->crknum("Error-1170: syntax error");
}
unless ($pm2b->[0] eq 'P') {
$self->crknum("Error-1180: syntax error");
}
$atype = $param2->[1]."($pm2b->[1])";
}
else {
if ($param2->[0] eq 'B') {
$atype = $param2->[1];
}
elsif ($param2->[0] eq 'P') {
$atype = "($param2->[1])";
}
else {
$self->crknum("Error-1190: syntax error");
}
}
my $param3 = shift(@$plist);
unless (defined $param3) {
$self->crknum("Error-1200: syntax error");
}
my ($default, $fixed);
if ($param3->[0] eq 'B' and $param3->[1] eq '#FIXED') {
my $pm3b = shift(@$plist);
unless (defined $pm3b) {
$self->crknum("Error-1210: syntax error");
}
unless ($pm3b->[0] eq 'Q') {
$self->crknum("Error-1220: syntax error");
}
$default = "'$pm3b->[1]'";
$fixed = '1';
}
else {
if ($param3->[0] eq 'B') {
$default = $param3->[1];
}
elsif ($param3->[0] eq 'Q') {
$default = "'$param3->[1]'";
}
else {
$self->crknum("Error-1230: syntax error");
}
}
unless ($self->{_DocOpen}) {
$self->crknum("Error-1240: syntax error");
}
$self->_plausi('I'); # PLAUSI ==> 'I' = Attlist
$self->{_ItemCount}++;
my $cb_Attlist = $self->{_Setters}{Attlist};
if ($cb_Attlist) {
# Attlist (Expat, Elname, Attname, Type, Default, Fixed)
$cb_Attlist->($self, $name, $attrib, $atype, $default, $fixed);
}
}
}
sub _parse_Notation {
my $self = shift;
my ($plist) = @_;
#
# 'NOTA not=[name1], base=[*undef*], sys=[URI1], pub=[*undef*]',
#
# 'NOTA not=[name2], base=[*undef*], sys=[*undef*], pub=[public_ID2]',
#
# 'NOTA not=[name3], base=[*undef*], sys=[URI3], pub=[public_ID3]',
my $param0 = shift(@$plist);
unless (defined $param0) {
$self->crknum("Error-1250: Internal Error - Not enough elements in NOTATION");
}
unless ($param0->[0] eq 'B') {
$self->crknum("Error-1260: syntax error");
}
my $name = $param0->[1];
my ($base, $system, $public);
my $param1 = shift(@$plist);
if (defined $param1) {
unless ($param1->[0] eq 'B') {
$self->crknum("Error-1270: syntax error");
}
my $syspub;
if ($param1->[1] eq 'SYSTEM') {
$syspub = 'S';
}
elsif ($param1->[1] eq 'PUBLIC') {
$syspub = 'P';
}
else {
$self->crknum("Error-1280: syntax error");
}
my $param2 = shift(@$plist);
unless (defined $param2) {
$self->crknum("Error-1290: syntax error");
}
unless ($param2->[0] eq 'Q') {
$self->crknum("Error-1300: syntax error");
}
if ($syspub eq 'S') {
$system = $param2->[1];
}
else {
$public = $param2->[1];
}
my $param3 = shift(@$plist);
if (defined $param3) {
unless ($param3->[0] eq 'Q') {
$self->crknum("Error-1310: syntax error");
}
if ($syspub eq 'S') {
$public = $param3->[1];
}
else {
$system = $param3->[1];
}
}
}
unless ($self->{_DocOpen}) {
$self->crknum("Error-1320: syntax error");
}
$self->_plausi('O'); # PLAUSI ==> 'O' = Notation
$self->{_ItemCount}++;
my $cb_Notation = $self->{_Setters}{Notation};
if ($cb_Notation) {
# Notation (Expat, Notation, Base, Sysid, Pubid)
$cb_Notation->($self, $name, $base, $system, $public);
}
if (@$plist) {
$self->crknum("Error-1330: syntax error");
}
}
sub _plausi {
my $self = shift;
my ($pl) = @_;
my $tp = $pl eq 'D' || $pl eq 'F' || $pl eq 'U' || $pl eq 'I' || $pl eq 'L' || $pl eq 'O' || $pl eq 'T' ? 'DTD'
: $pl eq 'A' || $pl eq 'C' || $pl eq '!' ? 'TXT'
: $pl eq 'S' || $pl eq 'E' ? 'TAG'
: $pl eq 'P' ? 'PRC'
: $pl eq 'X' ? 'XML'
: $self->crknum("Error-1340: Internal Error - encountered plausi code = '$pl'");
my $stage = $self->{_Stage};
# PLAUSI ==> TXT - 'A' = CData
# PLAUSI ==> TXT - 'C' = Character Data
# PLAUSI ==> DTD - 'D' = DocType
# PLAUSI ==> TAG - 'E' = End Tag
# PLAUSI ==> DTD - 'F' = DocTypeFin
# PLAUSI ==> DTD - 'I' = Attlist
# PLAUSI ==> DTD - 'L' = Element
# PLAUSI ==> DTD - 'O' = Notation
# PLAUSI ==> PRC - 'P' = Processing Instruction
# PLAUSI ==> TAG - 'S' = Start Tag
# PLAUSI ==> DTD - 'T' = Entity
# PLAUSI ==> DTD - 'U' = Unparsed
# PLAUSI ==> PRC - 'X' = XML Declaration
# PLAUSI ==> TXT - '!' = comment
# Stage = 1 -->
# Stage = 2 --> DTD
# Stage = 3 --> , Character, CData,
# Stage = 4 --> after...
if ($stage == 1) {
if ($tp eq 'DTD') {
$stage = 2;
}
elsif ($pl eq 'S') {
$stage = 3;
}
}
elsif ($stage == 2) {
if ($pl eq 'S') {
$stage = 3;
}
}
if ($stage == 1) {
unless ($pl eq 'X' or $pl eq 'C' or $pl eq '!') {
$self->crknum("Error-1350: Internal Error - Found invalid callback, plausi = '$pl' at stage 1");
}
if ($pl eq 'X') {
$stage = 2;
}
}
elsif ($stage == 2) {
unless ($tp eq 'DTD' or $pl eq 'C' or $pl eq '!') {
$self->crknum("Error-1360: Internal Error - Expected 'DTD', but found '$tp', plausi = '$pl' at stage 2");
}
}
elsif ($stage == 3) {
unless ($tp eq 'TAG' or $tp eq 'PRC' or $tp eq 'TXT') {
$self->crknum("Error-1370: Internal Error - Expected 'TAG', 'PRC' or 'TXT', but found '$tp', plausi = '$pl' at stage 3");
}
if ($pl eq 'E' and $self->{_Scount} == 0) {
$stage = 4;
}
}
elsif ($stage == 4) {
unless ($pl eq 'C') {
$self->crknum("Error-1380: junk after document element");
}
}
else {
$self->crknum("Error-1390: Internal Error - invalid stage = $stage");
}
$self->{_Stage} = $stage;
}
sub _update_ctr {
my $self = shift;
my ($emit) = @_;
$self->{_Read_Bytes} += length($emit);
$self->{_Read_Lines} += $emit =~ tr{\n}{};
if($emit =~ m{\n ([^\n]*) \z}xms) {
$self->{_Read_Cols} = length($1) + 2;
}
else {
$self->{_Read_Cols} += length($emit);
}
}
sub parse_done {
my $self = shift;
if ($self->{_Action} eq 'F') {
$self->crknum("Error-1400: unclosed token");
}
if ($self->{_Action} eq 'G') {
$self->crknum("Error-1410: syntax error");
}
if (@{$self->{_Stack}}) {
$self->crknum("Error-1420: no element found");
}
unless ($self->{_Scount} == 0) {
$self->crknum("Error-1430: Internal Error - no element found");
}
unless ($self->{_Text} eq '') {
$self->crknum("Error-1440: unclosed token");
}
$self->_emit_Final;
# $self->release; # nothing needs to be released, everything is reference counted
}
sub release { # dummy subroutine, nothing needs to be released, everything is reference counted
}
sub crknum {
my $self = shift;
my $pos = 'at line '.$self->{_Read_Lines}.', column '.$self->{_Read_Cols}.', byte '.$self->{_Read_Bytes};
croak($_[0].' '.$pos);
}
1;
__END__
=head1 NAME
XML::Parsepp - Simplified pure perl parser for XML
=head1 SYNOPSIS
use XML::Parsepp;
$p1 = new XML::Parsepp;
$p1->parsefile('REC-xml-19980210.xml');
$p1->parse('Hello World');
# Alternative
$p2 = new XML::Parsepp(Handlers => {Start => \&handle_start,
End => \&handle_end,
Char => \&handle_char});
$p2->parse($socket);
# Another alternative
$p3 = new XML::Parsepp;
$p3->setHandlers(Char => \&text,
Default => \&other);
open(FOO, 'xmlgenerator |');
$p3->parse(*FOO);
close(FOO);
$p3->parsefile('junk.xml');
Allow duplicate attributes with option: dupatt => ';'
The concatenation string XML::Parsepp->new(dupatt => $str) is
restricted to printable ascii excluding " and '
$p1 = new XML::Parsepp(dupatt => ';');
$p1->parse('Hello World');
This will fire the Start event with the following parameters
start($ExpatNB, 'foo', 'id', 'me;too');
=head1 DESCRIPTION
This module provides a pure Perl implementation to parse XML documents. Its interface is very
close to that of XML::Parser (in fact, the synopsis has, with some minor modifications, been copied
from XML::Parser).
=head1 USAGE
XML::Parsepp can be used as a pure Perl alternative to XML::Parser. The main use case is with XML::Reader
where it can be used as a drop-in replacement. Here is a sample:
use XML::Reader qw(XML::Parsepp);
my $text = q{n tm r};
my $rdr = XML::Reader->new(\$text) or die "Error: $!";
while ($rdr->iterate) {
printf "Path: %-19s, Value: %s\n", $rdr->path, $rdr->value;
}
=head1 AUTHOR
Klaus Eichner
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2009-2011 by Klaus Eichner
All rights reserved. This program is free software; you can redistribute
it and/or modify it under the terms of the artistic license 2.0,
see http://www.opensource.org/licenses/artistic-license-2.0.php
=cut