001    package org.maltparser.core.syntaxgraph.reader;
002    
003    import java.io.File;
004    import java.util.HashMap;
005    
006    import org.maltparser.core.config.ConfigurationDir;
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.flow.FlowChartInstance;
009    import org.maltparser.core.flow.item.ChartItem;
010    import org.maltparser.core.flow.spec.ChartItemSpecification;
011    import org.maltparser.core.helper.Util;
012    import org.maltparser.core.io.dataformat.DataFormatException;
013    import org.maltparser.core.io.dataformat.DataFormatInstance;
014    import org.maltparser.core.io.dataformat.DataFormatManager;
015    import org.maltparser.core.options.OptionManager;
016    import org.maltparser.core.symbol.SymbolTableHandler;
017    import org.maltparser.core.syntaxgraph.TokenStructure;
018    
019    public class ReadChartItem extends ChartItem {
020            private String idName;
021            private String inputFormatName;
022            private String inputFileName;
023            private String inputCharSet;
024            private String readerOptions;
025            private int iterations;
026            private Class<? extends SyntaxGraphReader> graphReaderClass;
027            
028            private String nullValueStrategy;
029            private String rootLabels;
030            
031            private SyntaxGraphReader reader;
032            private String targetName;
033            private String optiongroupName;
034            private DataFormatInstance inputDataFormatInstance;
035            private TokenStructure cachedGraph = null;
036            
037            public ReadChartItem() { super(); }
038    
039            public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
040                    super.initialize(flowChartinstance, chartItemSpecification);
041                    
042                    for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
043                            if (key.equals("id")) {
044                                    idName = chartItemSpecification.getChartItemAttributes().get(key);
045                            } else if (key.equals("target")) {
046                                    targetName = chartItemSpecification.getChartItemAttributes().get(key);
047                            } else if (key.equals("optiongroup")) {
048                                    optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
049                            }
050                    }
051                    
052                    if (idName == null) {
053                            idName = getChartElement("read").getAttributes().get("id").getDefaultValue();
054                    } else if (targetName == null) {
055                            targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
056                    } else if (optiongroupName == null) {
057                            optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
058                    }
059                    
060                    setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
061                    setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
062                    setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
063                    setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
064                    if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) {
065                            setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations"));
066                    } else {
067                            setIterations(1);
068                    }
069                    setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
070    
071                    setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
072                    setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString());
073                    
074                    
075                    initInput(getNullValueStrategy(), getRootLabels());
076                    initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations);
077            }
078            
079            public int preprocess(int signal) throws MaltChainedException {
080                    return signal;
081            }
082            
083            public int process(int signal) throws MaltChainedException {
084                    if (cachedGraph == null) {
085                            cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
086                    }
087                    int prevIterationCounter = reader.getIterationCounter();
088                    boolean moreInput = reader.readSentence(cachedGraph);
089    //              System.out.println(cachedGraph);
090    //              System.exit(1);
091                    if (!moreInput) {
092                            return ChartItem.TERMINATE;
093                    } else if (prevIterationCounter < reader.getIterationCounter()) {
094                            return ChartItem.NEWITERATION;
095                    }
096                    return ChartItem.CONTINUE;
097    //              return continueNextSentence && moreInput;
098            }
099            
100            public int postprocess(int signal) throws MaltChainedException {
101                    return signal;
102            }
103            
104            public void terminate() throws MaltChainedException {
105                    if (reader != null) {
106                            reader.close();
107                            reader = null;
108                    }
109                    cachedGraph = null;
110                    inputDataFormatInstance = null;
111            }
112            
113            public String getInputFormatName() {
114                    if (inputFormatName == null) {
115                            return "/appdata/dataformat/conllx.xml";
116                    }
117                    return inputFormatName;
118            }
119    
120            public void setInputFormatName(String inputFormatName) {
121                    this.inputFormatName = inputFormatName;
122            }
123    
124            public String getInputFileName() {
125                    if (inputFileName == null) {
126                            return "/dev/stdin";
127                    }
128                    return inputFileName;
129            }
130    
131            public void setInputFileName(String inputFileName) {
132                    this.inputFileName = inputFileName;
133            }
134    
135            public String getInputCharSet() {
136                    if (inputCharSet == null) {
137                            return "UTF-8";
138                    }
139                    return inputCharSet;
140            }
141    
142            public void setInputCharSet(String inputCharSet) {
143                    this.inputCharSet = inputCharSet;
144            }
145    
146            public String getReaderOptions() {
147                    if (readerOptions == null) {
148                            return "";
149                    }
150                    return readerOptions;
151            }
152    
153            public void setReaderOptions(String readerOptions) {
154                    this.readerOptions = readerOptions;
155            }
156    
157            
158            public int getIterations() {
159                    return iterations;
160            }
161    
162            public void setIterations(int iterations) {
163                    this.iterations = iterations;
164            }
165    
166            public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
167                    return graphReaderClass;
168            }
169    
170            public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
171                    try {
172                            if (graphReaderClass != null) {
173                                    this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
174                            }
175                    } catch (ClassCastException e) {
176                            throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
177                    }
178            }
179            
180            public String getNullValueStrategy() {
181                    if (nullValueStrategy == null) {
182                            return "one";
183                    }
184                    return nullValueStrategy;
185            }
186    
187            public void setNullValueStrategy(String nullValueStrategy) {
188                    this.nullValueStrategy = nullValueStrategy;
189            }
190    
191            public String getRootLabels() {
192                    if (nullValueStrategy == null) {
193                            return "ROOT";
194                    }
195                    return rootLabels;
196            }
197    
198            public void setRootLabels(String rootLabels) {
199                    this.rootLabels = rootLabels;
200            }
201            
202    
203            public String getTargetName() {
204                    return targetName;
205            }
206    
207            public void setTargetName(String targetName) {
208                    this.targetName = targetName;
209            }
210    
211            public SyntaxGraphReader getReader() {
212                    return reader;
213            }
214    
215            public DataFormatInstance getInputDataFormatInstance() {
216                    return inputDataFormatInstance;
217            }
218    
219            public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException {
220                    ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
221                    DataFormatManager dataFormatManager = configDir.getDataFormatManager();
222    //              DataFormatManager dataFormatManager = flowChartinstance.getDataFormatManager();
223                    SymbolTableHandler symbolTables = configDir.getSymbolTables();
224    //              SymbolTableHandler symbolTables = flowChartinstance.getSymbolTables();
225                    HashMap<String, DataFormatInstance> dataFormatInstances = configDir.getDataFormatInstances();
226    //              HashMap<String, DataFormatInstance> dataFormatInstances = flowChartinstance.getDataFormatInstances();
227                    
228                    inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy, rootLabels);
229                    if (!dataFormatInstances.containsKey(dataFormatManager.getInputDataFormatSpec().getDataFormatName())) {
230                            dataFormatInstances.put(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
231                    }
232            }
233            
234            public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException {
235                    try {   
236                            reader = syntaxGraphReader.newInstance();
237                            if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
238                                    reader.open(System.in, inputCharSet);
239                            } else if (new File(inputFile).exists()) {
240                                    reader.setNIterations(iterations);
241                                    reader.open(inputFile, inputCharSet);
242                            } else {
243                                    reader.setNIterations(iterations);
244                                    reader.open(Util.findURL(inputFile), inputCharSet);
245                            }
246                            reader.setDataFormatInstance(inputDataFormatInstance); 
247                            reader.setOptions(readerOptions);
248                    } catch (InstantiationException e) {
249                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
250                    } catch (IllegalAccessException e) {
251                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
252                    }       
253            }
254            
255            public boolean equals(Object obj) {
256                    if (this == obj)
257                            return true;
258                    if (obj == null)
259                            return false;
260                    if (getClass() != obj.getClass())
261                            return false;
262                    return obj.toString().equals(this.toString());
263            }
264            
265            public int hashCode() {
266                    return 217 + (null == toString() ? 0 : toString().hashCode());
267            }
268            
269            public String toString() {
270                    final StringBuilder sb = new StringBuilder();
271                    sb.append("    read ");
272                    sb.append("id:");sb.append(idName);
273                    sb.append(' ');
274                    sb.append("target:");
275                    sb.append(targetName);
276                    sb.append(' ');
277                    sb.append("optiongroup:");
278                    sb.append(optiongroupName);
279                    return sb.toString();
280            }
281    }