001    package org.maltparser.core.syntaxgraph.reader;
002    
003    import java.text.SimpleDateFormat;
004    import java.util.Date;
005    import java.util.LinkedHashMap;
006    import java.util.SortedMap;
007    import java.util.TreeMap;
008    
009    import org.maltparser.core.helper.Util;
010    import org.maltparser.core.symbol.SymbolTable;
011    import org.maltparser.core.symbol.SymbolTableHandler;
012    /**
013    *
014    *
015    * @author Johan Hall
016    */
017    public class TigerXMLHeader {
018            public enum Domain {
019                    T, // feature for terminal nodes 
020                    NT, // feature for nonterminal nodes 
021                    FREC, //feature for both
022                    EL, // edge label (same as "edgelabel" in TigerXML schema)
023                    SEL // secondary edge Label (same as "secedgelabel" in TigerXML schema)
024            };
025    
026            private String corpusID;
027            private String corpusVersion;
028            private String external;
029            private String metaName;
030            private String metaAuthor;
031            private String metaDescription;
032            private String metaInDate;
033            private String metaFormat;
034            private String metaHistory;
035            private SymbolTableHandler symbolTableHandler;
036            private FeatureEdgeLabel edgeLabels;
037            private FeatureEdgeLabel secEdgeLabels;
038            private LinkedHashMap<String,FeatureEdgeLabel> features;
039            
040            public TigerXMLHeader(SymbolTableHandler symbolTableHandler) { 
041                    setSymbolTableHandler(symbolTableHandler);
042                    features = new LinkedHashMap<String,FeatureEdgeLabel>();
043            }
044    
045            public boolean isTigerXMLWritable() {
046                    return true;
047                    //return features.size() > 0;
048            }
049            
050            public void addFeature(String featureName, String domainName) {
051                    if (!features.containsKey(featureName)) {
052                            features.put(featureName, new FeatureEdgeLabel(featureName, domainName));
053                    } 
054            }
055            
056            public void addFeatureValue(String featureName, String name) {
057                    addFeatureValue(featureName, name, "\t");
058            }
059            
060            public void addFeatureValue(String featureName, String name, String desc) {
061                    if (features.containsKey(featureName)) {
062                            if (desc == null || desc.length() == 0) {
063                                    features.get(featureName).addValue(name, "\t");
064                            } else {
065                                    features.get(featureName).addValue(name, desc);
066                            }
067                    } 
068            }
069            
070            public void addEdgeLabelValue(String name) {
071                    addEdgeLabelValue(name, "\t");
072            }
073            
074            public void addEdgeLabelValue(String name, String desc) {
075                    if (edgeLabels == null) {
076                            edgeLabels = new FeatureEdgeLabel("edgelabel", Domain.EL);
077                    }
078                    if (desc == null || desc.length() == 0) {
079                            edgeLabels.addValue(name, "\t");
080                    } else {
081                            edgeLabels.addValue(name, desc);
082                    }
083            }
084            
085            public void addSecEdgeLabelValue(String name) {
086                    addSecEdgeLabelValue(name, "\t");
087            }
088            
089            public void addSecEdgeLabelValue(String name, String desc) {
090                    if (secEdgeLabels == null) {
091                            secEdgeLabels = new FeatureEdgeLabel("secedgelabel", Domain.SEL);
092                    }
093                    if (desc == null || desc.length() == 0) {
094                            secEdgeLabels.addValue(name, "\t");
095                    } else {
096                            secEdgeLabels.addValue(name, desc);
097                    }
098            }
099            
100            public String getCorpusID() {
101                    return corpusID;
102            }
103    
104            public void setCorpusID(String corpusID) {
105                    this.corpusID = corpusID;
106            }
107    
108            public String getCorpusVersion() {
109                    return corpusVersion;
110            }
111    
112            public void setCorpusVersion(String corpusVersion) {
113                    this.corpusVersion = corpusVersion;
114            }
115    
116            public void setExternal(String external) {
117                    this.external = external;
118            }
119            
120            public String getExternal() {
121                    return external;
122            }
123            
124            public void setMeta(String metaElement, String value) {
125                    if (metaElement.equals("name"))                 { setMetaName(value); }
126                    if (metaElement.equals("author"))               { setMetaAuthor(value); }
127                    if (metaElement.equals("description"))  { setMetaDescription(value); }
128                    if (metaElement.equals("date"))                 { setMetaInDate(value); }
129                    if (metaElement.equals("format"))               { setMetaFormat(value); }
130                    if (metaElement.equals("history"))              { setMetaHistory(value); }
131            }
132    
133            public String getMetaName() {
134                    return metaName;
135            }
136    
137            public void setMetaName(String metaName) {
138                    this.metaName = metaName;
139            }
140    
141            public String getMetaAuthor() {
142                    return metaAuthor;
143            }
144            
145            public void setMetaAuthor(String metaAuthor) {
146                    this.metaAuthor = metaAuthor;
147            }
148    
149            public String getMetaDescription() {
150                    return metaDescription;
151            }
152            
153            public void setMetaDescription(String metaDescription) {
154                    this.metaDescription = metaDescription;
155            }
156            
157            public String getMetaInDate() {
158                    return metaInDate;
159            }
160    
161            public String getMetaCurrentDate() {
162                    return getMetaCurrentDate("yyyy-MM-dd HH:mm:ss"); 
163            }
164            
165            public String getMetaCurrentDate(String format) {
166                    return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
167            }
168            
169            public void setMetaInDate(String metaInDate) {
170                    this.metaInDate = metaInDate;
171            }
172    
173            public String getMetaFormat() {
174                    return metaFormat;
175            }
176    
177            public void setMetaFormat(String metaFormat) {
178                    this.metaFormat = metaFormat;
179            }
180    
181            public String getMetaHistory() {
182                    return metaHistory;
183            }
184    
185            public void setMetaHistory(String metaHistory) {
186                    this.metaHistory = metaHistory;
187            }
188            
189            public SymbolTableHandler getSymbolTableHandler() {
190                    return symbolTableHandler;
191            }
192    
193            protected void setSymbolTableHandler(SymbolTableHandler symbolTableHandler) {
194                    this.symbolTableHandler = symbolTableHandler;
195            }
196    
197            public String toTigerXML() {
198                    final StringBuilder sb = new StringBuilder();
199                    
200                    if (getCorpusVersion() == null) {
201                            sb.append("<corpus id=\"");
202                            sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID()));
203                            sb.append("\">\n");
204                    } else {
205                            sb.append("<corpus id=\"");
206                            sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID()));
207                            sb.append("\" version=\"");
208                            sb.append(getCorpusVersion());
209                            sb.append("\">\n");
210                    }
211                    sb.append("  <head>\n");
212                    sb.append("    <meta>\n");
213                    sb.append("      <name>");
214                    sb.append(((getMetaName() == null)?"GeneratedByMaltParser":Util.xmlEscape(getMetaName())));
215                    sb.append("</name>\n");
216                    sb.append("      <author>MaltParser</author>\n");
217                    sb.append("      <date>");
218                    sb.append(getMetaCurrentDate());
219                    sb.append("</date>\n");
220                    
221                    sb.append("      <description>");
222                    sb.append(Util.xmlEscape("Unfortunately, you have to add the annotations header data yourself. Maybe in later releases this will be fixed. "));
223                    sb.append("</description>\n");
224                    
225    //              if (getMetaDescription() != null) {
226    //                      sb.append("      <description>");
227    //                      sb.append(Util.xmlEscape(getMetaDescription()));
228    //                      sb.append("</description>\n");
229    //              }
230    //              if (getMetaFormat() != null) {
231    //                      sb.append("      <format>");
232    //                      sb.append(Util.xmlEscape(getMetaFormat()));
233    //                      sb.append("</format>\n");
234    //              }
235    //              if (getMetaHistory() != null) {
236    //                      sb.append("      <history>");
237    //                      sb.append(Util.xmlEscape(getMetaHistory()));
238    //                      sb.append("</history>\n");
239    //              }
240                    sb.append("    </meta>\n");
241                    sb.append("    <annotation/>\n");
242    //              sb.append("    <annotation>\n");
243    //              for (String name : features.keySet()) {
244    //                      sb.append(features.get(name).toTigerXML());
245    //              }
246    //              if (edgeLabels != null) {
247    //                      sb.append(edgeLabels.toTigerXML());
248    //              }
249    //              if (secEdgeLabels != null) {
250    //                      sb.append(secEdgeLabels.toTigerXML());
251    //              }
252    //              sb.append("    </annotation>\n");
253                    sb.append("  </head>\n");
254                    sb.append("  <body>\n");
255                    return sb.toString();
256            }
257            
258            public String toString() {
259                    return toTigerXML();
260            }
261            
262            protected class FeatureEdgeLabel {
263                    private String name;
264                    private Domain domain;
265                    // values: key mapped to \t (tab) indicates that the description part is missing
266                    private SortedMap<String, String> values; 
267                    private SymbolTable table;
268                    
269                    public FeatureEdgeLabel(String name, String domainName) { 
270                            setName(name);
271                            setDomain(domainName);
272                    }
273    
274                    public FeatureEdgeLabel(String name, Domain domain) { 
275                            setName(name);
276                            setDomain(domain);
277                    }
278                    
279                    public String getName() {
280                            return name;
281                    }
282    
283                    public void setName(String name) {
284                            this.name = name;
285                    }
286                    
287                    public void setDomain(String domainName) {
288                            domain = Domain.valueOf(domainName);
289                    }
290                    
291                    public void setDomain(Domain domain) {
292                            this.domain = domain;
293                    }
294                    
295                    public String getDomainName() {
296                            return domain.toString();
297                    }
298                    
299                    public Domain getDomain() {
300                            return domain;
301                    }
302                    
303                    public SymbolTable getTable() {
304                            return table;
305                    }
306    
307                    public void setTable(SymbolTable table) {
308                            this.table = table;
309                    }
310    
311                    public void addValue(String name) {
312                            addValue(name, "\t");
313                    }
314                    
315                    public void addValue(String name, String desc) {
316                            if (values == null) {
317                                    values = new TreeMap<String,String>();
318                            }
319                            values.put(name, desc);
320                    }
321                    
322                    public String toTigerXML() {
323                            final StringBuilder sb = new StringBuilder();
324                            if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) {
325                                    sb.append("      <feature domain=\"");
326                                    sb.append(getDomainName());
327                                    sb.append("\" name=\"");
328                                    sb.append(getName());
329                                    sb.append((values == null)?"\" />\n":"\">\n");
330                            }
331                            if (domain == Domain.EL) {
332                                    sb.append((values != null)?"      <edgelabel>\n":"      <edgelabel />\n");
333                            }
334                            if (domain == Domain.SEL) {
335                                    sb.append((values != null)?"      <secedgelabel>\n":"      <secedgelabel />\n");
336                            }
337                            if (values != null) {
338                                    for (String name : values.keySet()) {
339                                            sb.append("        <value name=\"");
340                                            sb.append(name);
341                                            if (values.get(name).equals("\t")) {
342                                                    sb.append("\" />\n");
343                                            } else {
344                                                    sb.append("\">");
345                                                    sb.append(Util.xmlEscape(values.get(name)));
346                                                    sb.append("</value>\n");
347                                            }
348                                    }
349                            }
350                            if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) {
351                                    if (values != null) {
352                                            sb.append("      </feature>\n");
353                                    }
354                            }
355                            if (domain == Domain.EL && values != null) {
356                                    sb.append("      </edgelabel>\n");
357                            }
358                            if (domain == Domain.SEL && values != null) {
359                                    sb.append("      </secedgelabel>\n");
360                            }
361                            return sb.toString();
362                    }
363                    
364                    public String toString() {
365                            return toTigerXML();
366                    }
367            }
368    }       
369    
370      
371