001    package org.maltparser.core.io.dataformat;
002    
003    import java.net.URL;
004    import java.util.HashSet;
005    import java.util.SortedMap;
006    import java.util.TreeMap;
007    
008    import javax.xml.parsers.DocumentBuilder;
009    import javax.xml.parsers.DocumentBuilderFactory;
010    import javax.xml.parsers.ParserConfigurationException;
011    
012    import org.maltparser.core.exception.MaltChainedException;
013    import org.maltparser.core.helper.SystemLogger;
014    import org.maltparser.core.helper.Util;
015    import org.maltparser.core.symbol.SymbolTableHandler;
016    import org.w3c.dom.Element;
017    import org.w3c.dom.NodeList;
018    import org.xml.sax.SAXException;
019    
020    /**
021     *  
022     *
023     * @author Johan Hall
024     * @since 1.0
025    **/
026    public class DataFormatSpecification {  
027            public enum DataStructure {
028                    DEPENDENCY,  // Dependency structure
029                    PHRASE, // Phrase structure
030            };
031            private int entryPositionCounter;
032            private String dataFormatName;
033            private DataStructure dataStructure;
034            private final SortedMap<String, DataFormatEntry> entries;
035            private final HashSet<Dependency> dependencies;
036    //      private final HashSet<SyntaxGraphReader> supportedReaders;
037    //      private final HashSet<SyntaxGraphWriter> supportedWriters;
038            
039            public DataFormatSpecification() {
040                    entries = new TreeMap<String, DataFormatEntry>();
041                    entryPositionCounter = 0;
042                    dependencies = new HashSet<Dependency>();
043    //              supportedReaders = new HashSet<SyntaxGraphReader>();
044    //              supportedWriters = new HashSet<SyntaxGraphWriter>();
045            }
046            
047            public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy, String rootLabel) throws MaltChainedException {
048                    return new DataFormatInstance(entries, symbolTables, nullValueStrategy, rootLabel, this);
049    
050            }
051            
052            public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
053                    URL url = Util.findURL(fileName);
054                    if (url == null) {
055                            throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
056                    }
057                    parseDataFormatXMLfile(url);
058            }
059            
060            public HashSet<Dependency> getDependencies() {
061                    return dependencies;
062            }
063            
064            public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
065                    if (url == null) {
066                            throw new DataFormatException("The data format specifcation file cannot be found. ");
067                    }
068                    
069                    if (SystemLogger.logger().isInfoEnabled()) {
070                            int index = url.toString().indexOf('!');
071                            if (index == -1) {
072                                    SystemLogger.logger().debug("  Data Format          : "+url.toString()+"\n");
073                            } else {
074                                    SystemLogger.logger().debug("  Data Format          : "+url.toString().substring(index+1)+"\n");
075                            }
076                    }
077                    
078            try {
079                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
080                DocumentBuilder db = dbf.newDocumentBuilder();
081    
082                    Element root = db.parse(url.openStream()).getDocumentElement();
083                    if (root.getNodeName().equals("dataformat")) { 
084                            dataFormatName = root.getAttribute("name");
085                            if (root.getAttribute("datastructure").length() > 0) {
086                                    dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
087                            } else {
088                                    dataStructure = DataStructure.DEPENDENCY;
089                            }
090                    } else {
091                            throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
092                    }
093                    NodeList cols = root.getElementsByTagName("column");
094                Element col = null;
095                for (int i = 0, n = cols.getLength(); i < n; i++) {
096                    col = (Element)cols.item(i);
097                    DataFormatEntry entry = new DataFormatEntry(i, col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
098                    entries.put(entry.getDataFormatEntryName(), entry);
099                }
100                NodeList deps = root.getElementsByTagName("dependencies");
101                if (deps.getLength() > 0) {
102                    NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
103                    for (int i = 0, n = dep.getLength(); i < n; i++) {
104                            Element e = (Element)dep.item(i);
105                            dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
106                    }
107                }
108            } catch (java.io.IOException e) {
109                    throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
110            } catch (ParserConfigurationException e) {
111                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
112            } catch (SAXException e) {
113                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
114            }
115            }
116            
117            public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
118                    DataFormatEntry entry = new DataFormatEntry(entryPositionCounter++, dataFormatEntryName, category, type, defaultOutput);
119                    entries.put(entry.getDataFormatEntryName(), entry);
120            }
121            
122            public DataFormatEntry getEntry(String dataFormatEntryName) {
123                    return entries.get(dataFormatEntryName);
124            }
125    
126            public String getDataFormatName() {
127                    return dataFormatName;
128            }
129    
130            public DataStructure getDataStructure() {
131                    return dataStructure;
132            }
133    
134            public String toString() {
135                    final StringBuilder sb = new StringBuilder();
136                    sb.append("Data format specification: ");
137                    sb.append(dataFormatName);
138                    sb.append('\n');
139                    for (DataFormatEntry dfe : entries.values()) {
140                            sb.append(dfe);
141                            sb.append('\n');
142                    }
143                    return sb.toString();
144            }
145            
146            public class Dependency {
147                    protected String dependentOn;
148                    protected String urlString;
149                    protected String map;
150                    protected String mapUrl;
151                    
152                    public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
153                            setDependentOn(dependentOn);
154                            setUrlString(urlString);
155                            setMap(map);
156                            setMapUrl(mapUrl);
157                    }
158                    
159                    public String getDependentOn() {
160                            return dependentOn;
161                    }
162                    protected void setDependentOn(String dependentOn) {
163                            this.dependentOn = dependentOn;
164                    }
165                    
166                    public String getUrlString() {
167                            return urlString;
168                    }
169    
170                    public void setUrlString(String urlString) {
171                            this.urlString = urlString;
172                    }
173    
174                    public String getMap() {
175                            return map;
176                    }
177                    protected void setMap(String map) {
178                            this.map = map;
179                    }
180    
181                    public String getMapUrl() {
182                            return mapUrl;
183                    }
184    
185                    public void setMapUrl(String mapUrl) {
186                            this.mapUrl = mapUrl;
187                    }
188            }
189    }