001    package org.maltparser.core.syntaxgraph.writer;
002    
003    
004    import java.util.HashMap;
005    
006    import org.maltparser.core.config.ConfigurationDir;
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.flow.FlowChartInstance;
009    import org.maltparser.core.flow.item.ChartItem;
010    import org.maltparser.core.flow.spec.ChartItemSpecification;
011    import org.maltparser.core.io.dataformat.DataFormatException;
012    import org.maltparser.core.io.dataformat.DataFormatInstance;
013    import org.maltparser.core.io.dataformat.DataFormatManager;
014    import org.maltparser.core.options.OptionManager;
015    import org.maltparser.core.symbol.SymbolTableHandler;
016    import org.maltparser.core.syntaxgraph.TokenStructure;
017    
018    /**
019    *
020    *
021    * @author Johan Hall
022    */
023    public class WriteChartItem extends ChartItem {
024            private String idName;
025            private String outputFormatName;
026            private String outputFileName;
027            private String outputCharSet;
028            private String writerOptions;
029            private Class<? extends SyntaxGraphWriter> graphWriterClass;
030            
031            private String nullValueStrategy;
032            private String rootLabels;
033            
034            private SyntaxGraphWriter writer;
035            private String sourceName;
036            private String optiongroupName;
037            private DataFormatInstance outputDataFormatInstance;
038            private TokenStructure cachedGraph = null;
039            
040            public WriteChartItem() { super(); }
041            
042            public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
043                    super.initialize(flowChartinstance, chartItemSpecification);
044                    
045                    for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
046                            if (key.equals("id")) {
047                                    idName = chartItemSpecification.getChartItemAttributes().get(key);
048                            } else if (key.equals("source")) {
049                                    sourceName = chartItemSpecification.getChartItemAttributes().get(key);
050                            } else if (key.equals("optiongroup")) {
051                                    optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
052                            }
053                    }
054                    
055                    if (idName == null) {
056                            idName = getChartElement("write").getAttributes().get("id").getDefaultValue();
057                    } else if (sourceName == null) {
058                            sourceName = getChartElement("write").getAttributes().get("source").getDefaultValue();
059                    } else if (optiongroupName == null) {
060                            optiongroupName = getChartElement("write").getAttributes().get("optiongroup").getDefaultValue();
061                    }
062                    
063                    setOutputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
064                    setOutputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "outfile").toString());
065                    setOutputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
066                    setWriterOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "writer_options").toString());
067                    setSyntaxGraphWriterClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "writer"));
068    
069                    setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
070                    setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString());
071    
072                    initOutput(getNullValueStrategy(), getRootLabels());
073                    initWriter(getSyntaxGraphWriterClass(), getOutputFileName(), getOutputCharSet(), getWriterOptions());
074            }
075            
076            public int preprocess(int signal) throws MaltChainedException {
077                    return signal;
078            }
079            
080            public int process(int signal) throws MaltChainedException {
081                    if (cachedGraph == null) {
082                            cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, sourceName);
083                            writer.writeProlog();
084                    }
085                    writer.writeSentence(cachedGraph);
086                    if (signal == ChartItem.TERMINATE) {
087                            writer.writeEpilog();
088                    }
089                    return signal;
090            }
091            
092            public int postprocess(int signal) throws MaltChainedException {
093                    return signal;
094            }
095            
096            public void terminate() throws MaltChainedException {
097                    if (writer != null) {
098                            writer.close();
099                            writer = null;
100                    }
101                    outputDataFormatInstance = null;
102                    cachedGraph = null;
103            }
104            
105            public String getOutputFormatName() {
106                    if (outputFormatName == null) {
107                            return "/appdata/dataformat/conllx.xml";
108                    }
109                    return outputFormatName;
110            }
111    
112            public void setOutputFormatName(String outputFormatName) {
113                    this.outputFormatName = outputFormatName;
114            }
115    
116            public String getOutputFileName() {
117                    if (outputFileName == null) {
118                            return "/dev/stdout";
119                    }
120                    return outputFileName;
121            }
122    
123            public void setOutputFileName(String outputFileName) {
124                    this.outputFileName = outputFileName;
125            }
126    
127            public String getOutputCharSet() {
128                    if (outputCharSet == null) {
129                            return "UTF-8";
130                    }
131                    return outputCharSet;
132            }
133    
134            public void setOutputCharSet(String outputCharSet) {
135                    this.outputCharSet = outputCharSet;
136            }
137    
138            public String getWriterOptions() {
139                    if (writerOptions == null) {
140                            return "";
141                    }
142                    return writerOptions;
143            }
144    
145            public void setWriterOptions(String writerOptions) {
146                    this.writerOptions = writerOptions;
147            }
148    
149            public Class<? extends SyntaxGraphWriter> getSyntaxGraphWriterClass() {
150                    return graphWriterClass;
151            }
152    
153            public void setSyntaxGraphWriterClass(Class<?> graphWriterClass) throws MaltChainedException {
154                    try {
155                            if (graphWriterClass != null) {
156                                    this.graphWriterClass = graphWriterClass.asSubclass(org.maltparser.core.syntaxgraph.writer.SyntaxGraphWriter.class);
157                            }
158                    } catch (ClassCastException e) {
159                            throw new DataFormatException("The class '"+graphWriterClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.writer.SyntaxGraphWriter.class.getName()+"'. ", e);
160                    }
161            }
162    
163            public String getNullValueStrategy() {
164                    if (nullValueStrategy == null) {
165                            return "one";
166                    }
167                    return nullValueStrategy;
168            }
169    
170            public void setNullValueStrategy(String nullValueStrategy) {
171                    this.nullValueStrategy = nullValueStrategy;
172            }
173    
174            public String getRootLabels() {
175                    if (nullValueStrategy == null) {
176                            return "ROOT";
177                    }
178                    return rootLabels;
179            }
180    
181            public void setRootLabels(String rootLabels) {
182                    this.rootLabels = rootLabels;
183            }
184            
185            
186            public void initOutput(String nullValueStategy, String rootLabels) throws MaltChainedException {
187                    ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
188                    DataFormatManager dataFormatManager = configDir.getDataFormatManager();
189    //              DataFormatManager dataFormatManager = flowChartinstance.getDataFormatManager();
190                    SymbolTableHandler symbolTables = configDir.getSymbolTables();
191    //              SymbolTableHandler symbolTables = flowChartinstance.getSymbolTables();
192                    HashMap<String, DataFormatInstance> dataFormatInstances = configDir.getDataFormatInstances();
193    //              HashMap<String, DataFormatInstance> dataFormatInstances = flowChartinstance.getDataFormatInstances();
194                    
195                    if (dataFormatInstances.size() == 0 || dataFormatManager.getInputDataFormatSpec() != dataFormatManager.getOutputDataFormatSpec()) {
196                            outputDataFormatInstance = dataFormatManager.getOutputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy, rootLabels);
197                            if (!dataFormatInstances.containsKey(dataFormatManager.getOutputDataFormatSpec().getDataFormatName())) {
198                                    dataFormatInstances.put(dataFormatManager.getOutputDataFormatSpec().getDataFormatName(), outputDataFormatInstance);
199                            }
200                    } else {
201                            outputDataFormatInstance = dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName());
202                    }
203            }
204            
205            public void initWriter(Class<? extends SyntaxGraphWriter> syntaxGraphWriterClass, String outputFile, String outputCharSet, 
206                            String writerOption) throws MaltChainedException {
207                    try {   
208                            writer = syntaxGraphWriterClass.newInstance();
209                            if (outputFile == null || outputFile.length() == 0 || outputFile.equals("/dev/stdout")) {
210                                    writer.open(System.out, outputCharSet);
211                            } else {
212                                    writer.open(outputFile, outputCharSet);
213                            }
214                            writer.setDataFormatInstance(outputDataFormatInstance);
215                            writer.setOptions(writerOption);
216                    } catch (InstantiationException e) {
217                            throw new DataFormatException("The data writer '"+syntaxGraphWriterClass.getName()+"' cannot be initialized. ", e);
218                    } catch (IllegalAccessException e) {
219                            throw new DataFormatException("The data writer '"+syntaxGraphWriterClass.getName()+"' cannot be initialized. ", e);
220                    }
221            }
222    
223            public Class<? extends SyntaxGraphWriter> getGraphWriterClass() {
224                    return graphWriterClass;
225            }
226    
227            public SyntaxGraphWriter getWriter() {
228                    return writer;
229            }
230    
231            public String getSourceName() {
232                    return sourceName;
233            }
234    
235            public DataFormatInstance getOutputDataFormatInstance() {
236                    return outputDataFormatInstance;
237            }
238            
239            public boolean equals(Object obj) {
240                    if (this == obj)
241                            return true;
242                    if (obj == null)
243                            return false;
244                    if (getClass() != obj.getClass())
245                            return false;
246                    return obj.toString().equals(this.toString());
247            }
248            
249            public int hashCode() {
250                    return 217 + (null == toString() ? 0 : toString().hashCode());
251            }
252            
253            public String toString() {
254                    StringBuilder sb = new StringBuilder();
255                    sb.append("    write ");
256                    sb.append("id:");sb.append(idName);
257                    sb.append(' ');
258                    sb.append("source:");
259                    sb.append(sourceName);
260                    sb.append(' ');
261                    sb.append("optiongroup:");
262                    sb.append(optiongroupName);
263                    return sb.toString();
264            }
265    }