/* * @(#)XMLInputStreamImpl.java * * Copyright 2004, Francois PERRAD * See below for extracts from AElfred XML Parser */ import java.io.*; import java.net.URL; import java.util.Hashtable; import java.util.Stack; import org.xml.sax.*; import org.xml.sax.helpers.*; import org.omg.CORBA.Any; import org.omg.CORBA.TypeCode; public class XMLInputStreamImpl extends org.omg.CORBA.portable.XMLInputStream { //Fields private org.xml.sax.ErrorHandler errorHandler; private BufferedReader reader; // current reader private boolean emptyElement; //Constructors public XMLInputStreamImpl (InputStream _is, org.xml.sax.ErrorHandler _errorHandler) { errorHandler = _errorHandler; try { Reader r = new InputStreamReader (_is, "UTF-8"); reader = new BufferedReader (r); } catch (java.io.UnsupportedEncodingException e) { } _init(); } public XMLInputStreamImpl (InputStream _is) { this (_is, new org.xml.sax.helpers.DefaultHandler ()); } public XMLInputStreamImpl (org.xml.sax.InputSource source, org.xml.sax.ErrorHandler _errorHandler) { errorHandler = _errorHandler; reader = _getReaderFromSource (source); _init (); } public XMLInputStreamImpl (org.xml.sax.InputSource source) { this (source, new org.xml.sax.helpers.DefaultHandler ()); } private BufferedReader _getReaderFromSource (org.xml.sax.InputSource source) { Reader r = source.getCharacterStream (); if (r == null) { InputStream is = source.getByteStream (); if (is == null) { String uri = source.getSystemId (); if (uri == null) { return null; } try { URL url = new URL (uri); is = url.openStream (); } catch (IOException ex) { return null; } } try { r = new InputStreamReader (is, "UTF-8"); } catch (java.io.UnsupportedEncodingException e) { } } return new BufferedReader (r); } private void _init () { initializeVariables (); // predeclare the built-in entities here (replacement texts) // we don't need to intern(), since we're guaranteed literals // are always (globally) interned. setInternalEntity ("amp", "&"); setInternalEntity ("lt", "<"); setInternalEntity ("gt", ">"); setInternalEntity ("apos", "'"); setInternalEntity ("quot", """); try { pushURL (); parseMisc (); } catch (IOException ee) { throw new org.omg.CORBA.MARSHAL (ee.toString ()); } catch (org.xml.sax.SAXException se) { throw new org.omg.CORBA.MARSHAL (se.toString ()); } } //Methods public void setErrorHandler(org.xml.sax.ErrorHandler handler) { errorHandler = handler; } public org.xml.sax.ErrorHandler getErrorHandler() { return errorHandler; } private void warning (String msg) { SAXParseException se = new SAXParseException (msg, null, null, line, column); try { errorHandler.warning (se); } catch (org.xml.sax.SAXException e) { } } private void error (String msg) { SAXParseException se = new SAXParseException (msg, null, null, line, column); try { errorHandler.error (se); } catch (org.xml.sax.SAXException e) { } } private void fatalError (String msg) { SAXParseException se = new SAXParseException (msg, null, null, line, column); try { errorHandler.fatalError (se); } catch (org.xml.sax.SAXException e) { } } public void close () throws IOException { reader.close (); cleanupVariables (); super.close (); } public boolean read_boolean (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); if (str.equals ("true")) { return true; } if (!str.equals ("false")) { throw new org.omg.CORBA.MARSHAL ("Bad value for 'boolean'"); } return false; } public char read_char (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); if (str.length () != 1) { throw new org.omg.CORBA.MARSHAL ("Bad length for 'char'"); } return str.charAt (0); } public char read_wchar (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); if (str.length () != 1) { throw new org.omg.CORBA.MARSHAL ("Bad length for 'char'"); } return str.charAt (0); } public byte read_octet (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { int val = Integer.parseInt (str.trim ()); if (val < Byte.MIN_VALUE && val > Byte.MAX_VALUE) { throw new org.omg.CORBA.MARSHAL ("Out of range for 'octet'"); } return (byte)val; } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public short read_short (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { int val = Integer.parseInt (str.trim ()); if (val < Short.MIN_VALUE && val > Short.MAX_VALUE) { throw new org.omg.CORBA.MARSHAL ("Out of range for 'short'"); } return (short)val; } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public short read_ushort (java.lang.String tag) { return read_short (tag); } public int read_long (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { int val = Integer.parseInt (str.trim ()); return val; } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public int read_ulong (java.lang.String tag) { return read_long (tag); } public long read_longlong (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { long val = Long.parseLong (str.trim ()); return val; } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public long read_ulonglong (java.lang.String tag) { return read_longlong (tag); } public float read_float (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { float val = Float.parseFloat (str.trim ()); return val; } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public double read_double (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { double val = Double.parseDouble (str.trim ()); return val; } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public java.lang.String read_string (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); return str; } public java.lang.String read_wstring (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); return str; } public java.math.BigDecimal read_fixed (java.lang.String tag) { read_open_tag (tag); String str = read_pcdata(); read_close_tag (tag); try { return new java.math.BigDecimal (str.trim ()); } catch (NumberFormatException ex) { throw new org.omg.CORBA.MARSHAL (ex.getMessage ()); } } public org.omg.CORBA.Object read_Object (java.lang.String tag) { throw new org.omg.CORBA.NO_IMPLEMENT (); } public org.omg.CORBA.TypeCode read_TypeCode (java.lang.String tag) { throw new org.omg.CORBA.NO_IMPLEMENT (); } public org.omg.CORBA.Any read_any (java.lang.String tag) { throw new org.omg.CORBA.NO_IMPLEMENT (); } /** * Parse an element, with its tags. *
* [39] element ::= EmptyElementTag | STag content ETag
* [40] STag ::= '<' Name (S Attribute)* S? '>'
* [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
*
* (The '<' has already been read.) *
NOTE: this method actually chains onto parseContent (), if necessary, * and parseContent () will take care of calling parseETag (). */ public void read_open_tag (java.lang.String tag) { String gi; char c; emptyElement = false; try { while (true) { require ('<', "open_tag"); // Read the element type name. gi = readNmtoken (true); // Read the attributes, if any. // After this loop, "c" is the closing delimiter. boolean white = tryWhitespace (); c = readCh (); while (c != '/' && c != '>') { unread (c); if (!white) error ("need whitespace between attributes"); parseAttribute (gi); white = tryWhitespace (); c = readCh (); } // Figure out if this is a start tag // or an empty element, and dispatch an // event accordingly. switch (c) { case '>': if (! gi.equals(tag)) { currentElement = gi; parseContent (); } break; case '/': require ('>', "empty element tag"); if (gi.equals(tag)) emptyElement = true; break; } if (gi.equals(tag)) break; } } catch (org.xml.sax.SAXException se) { throw new org.omg.CORBA.MARSHAL (se.toString ()); } catch (IOException e) { throw new org.omg.CORBA.MARSHAL (e.toString ()); } return; } /** * Parse an end tag. *
* [42] ETag ::= '' Name S? '>'
*
*/
public void read_close_tag (java.lang.String tag)
{
if (emptyElement) {
return;
}
try {
require (tag, "element end tag");
skipWhitespace ();
require ('>', "name in end tag");
} catch (org.xml.sax.SAXException se) {
throw new org.omg.CORBA.MARSHAL (se.toString ());
} catch (IOException e) {
throw new org.omg.CORBA.MARSHAL (e.toString ());
}
return;
}
/**
* Parse the content of an element.
*
* [43] content ::= (element | CharData | Reference
* | CDSect | PI | Comment)*
* [67] Reference ::= EntityRef | CharRef
*
*/
public java.lang.String read_pcdata ()
{
if (emptyElement) {
return "";
}
StringBuffer data = new StringBuffer();
boolean ETag = false;
char c;
try {
while (!ETag) {
parseCharData();
// Handle delimiters
c = readCh ();
switch (c) {
case '&': // Found "&"
c = readCh ();
if (c == '#') {
parseCharRef ();
} else {
unread (c);
parseEntityRef (true);
}
break;
case '<': // Found "<"
data.append (dataBuffer, 0, dataBufferPos);
dataBufferPos = 0;
c = readCh ();
switch (c) {
case '!': // Found "
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
*
* (The <!-- has already been read.)
*/
private void parseComment ()
throws SAXException, IOException
{
char c;
parseUntil ("--");
require ('>', "-- in comment");
dataBufferPos = 0;
}
/**
* Parse a processing instruction and do a call-back.
*
* [16] PI ::= '<?' PITarget
* (S (Char* - (Char* '?>' Char*)))?
* '?>'
* [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
*
* (The <? has already been read.)
*/
private void parsePI ()
throws SAXException, IOException
{
String name;
name = readNmtoken (true);
if ("xml".equalsIgnoreCase (name))
error ("Illegal processing instruction target", name, null);
if (!tryRead ("?>")) {
requireWhitespace ();
parseUntil ("?>");
}
dataBufferPos = 0;
}
/**
* Parse a CDATA section.
*
* [18] CDSect ::= CDStart CData CDEnd
* [19] CDStart ::= '<![CDATA['
* [20] CData ::= (Char* - (Char* ']]>' Char*))
* [21] CDEnd ::= ']]>'
*
* (The '<![CDATA[' has already been read.) */ private void parseCDSect () throws SAXException, IOException { parseUntil ("]]>"); } /** * Parse the XML declaration. *
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
* [24] VersionInfo ::= S 'version' Eq
* ("'" VersionNum "'" | '"' VersionNum '"' )
* [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
* [32] SDDecl ::= S 'standalone' Eq
* ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
* [80] EncodingDecl ::= S 'encoding' Eq
* ( "'" EncName "'" | "'" EncName "'" )
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
*
* (The <?xml and whitespace have already been read.)
* @return the encoding in the declaration, uppercased; or null
* @see #parseTextDecl
* @see #setupDecoding
*/
private String parseXMLDecl (boolean ignoreEncoding)
throws SAXException, IOException
{
String version;
String encodingName = null;
String standalone = null;
int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
// Read the version.
require ("version", "XML declaration");
parseEq ();
version = readLiteral (flags);
if (!version.equals ("1.0")) {
error ("unsupported XML version", version, "1.0");
}
// Try reading an encoding declaration.
boolean white = tryWhitespace ();
if (tryRead ("encoding")) {
if (!white)
error ("whitespace required before 'encoding='");
parseEq ();
encodingName = readLiteral (flags);
}
// Try reading a standalone declaration
if (encodingName != null)
white = tryWhitespace ();
if (tryRead ("standalone")) {
if (!white)
error ("whitespace required before 'standalone='");
parseEq ();
standalone = readLiteral (flags);
if (! ("yes".equals (standalone) || "no".equals (standalone)))
error ("standalone flag must be 'yes' or 'no'");
}
skipWhitespace ();
require ("?>", "XML declaration");
return encodingName;
}
/**
* Parse miscellaneous markup outside the document element and DOCTYPE
* declaration.
*
* [27] Misc ::= Comment | PI | S
*
*/
private void parseMisc ()
throws SAXException, IOException
{
while (true) {
skipWhitespace ();
if (tryRead ("")) {
parsePI ();
} else if (tryRead ("