001    package org.maltparser.parser;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.util.Formatter;
006    import java.util.regex.Pattern;
007    
008    import org.apache.log4j.FileAppender;
009    import org.apache.log4j.Level;
010    import org.apache.log4j.Logger;
011    import org.apache.log4j.PatternLayout;
012    import org.maltparser.core.config.ConfigurationDir;
013    import org.maltparser.core.config.ConfigurationException;
014    import org.maltparser.core.config.ConfigurationRegistry;
015    import org.maltparser.core.exception.MaltChainedException;
016    import org.maltparser.core.helper.SystemLogger;
017    import org.maltparser.core.helper.Util;
018    import org.maltparser.core.io.dataformat.DataFormatInstance;
019    import org.maltparser.core.options.OptionManager;
020    import org.maltparser.core.propagation.PropagationManager;
021    import org.maltparser.core.symbol.SymbolTableHandler;
022    import org.maltparser.core.syntaxgraph.DependencyStructure;
023    import org.maltparser.parser.guide.ClassifierGuide;
024    
025    /**
026     * @author Johan Hall
027     *
028     */
029    public class SingleMalt implements DependencyParserConfig {
030            public static final int LEARN = 0;
031            public static final int PARSE = 1;
032            protected ConfigurationDir configDir;
033            protected Logger configLogger;
034            protected int optionContainerIndex;
035            protected Algorithm parsingAlgorithm = null;
036            protected int mode;
037            protected ConfigurationRegistry registry;
038            protected SymbolTableHandler symbolTableHandler;
039            protected long startTime;
040            protected long endTime;
041            protected int nIterations = 0;
042            protected PropagationManager propagationManager;
043            
044            public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
045    
046                    this.optionContainerIndex = containerIndex;
047                    this.mode = mode;
048                    setConfigurationDir(configDir);
049                    startTime = System.currentTimeMillis();
050                    configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
051                    registry = new ConfigurationRegistry();
052                    symbolTableHandler = dataFormatInstance.getSymbolTables();
053    
054                    if (mode == SingleMalt.LEARN) {
055                            checkOptionDependency();
056                    }
057                    registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
058                    registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
059    //              registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
060                    initPropagation();
061                    initParsingAlgorithm(); 
062                    
063            }
064            
065            private void initPropagation()  throws MaltChainedException {
066                    String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
067                    if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
068                            return;
069                    }
070                    propagationManager = new PropagationManager(configDir, symbolTableHandler);
071                    if (mode == SingleMalt.LEARN) {
072                            propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
073                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
074                    }
075                    getConfigLogger().info("  Propagation          : " + propagationSpecFileName+"\n");
076                    propagationManager.loadSpecification(propagationSpecFileName);
077            }
078            
079            /**
080             * Initialize the parsing algorithm
081             * 
082             * @throws MaltChainedException
083             */
084            protected void initParsingAlgorithm() throws MaltChainedException {
085                    if (mode == LEARN) {
086                            parsingAlgorithm = new BatchTrainer(this);
087                    } else if (mode == PARSE) {
088                            parsingAlgorithm = new DeterministicParser(this);
089                    }
090            }
091            
092            public void addRegistry(Class<?> clazz, Object o) {
093                    registry.put(clazz, o);
094            }
095            
096            public void process(Object[] arguments) throws MaltChainedException {
097                    if (mode == LEARN) {
098                            if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
099                                    throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
100                            }
101                            DependencyStructure systemGraph = (DependencyStructure)arguments[0];
102                            DependencyStructure goldGraph = (DependencyStructure)arguments[1];
103                            if (systemGraph.hasTokens() && getGuide() != null) {
104                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
105                            }
106                    } else if (mode == PARSE) {
107                            if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
108                                    throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
109                            }
110                            DependencyStructure processGraph = (DependencyStructure)arguments[0];
111                            if (processGraph.hasTokens()) {
112                                    ((Parser)getAlgorithm()).parse(processGraph);
113                            }
114                    }
115            }
116            
117            public void parse(DependencyStructure graph) throws MaltChainedException {
118                    if (graph.hasTokens()) {
119                            ((Parser)getAlgorithm()).parse(graph);
120                    }
121            }
122            
123            public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
124                    if (oracleGraph.hasTokens()) {
125                            if (getGuide() != null) {
126                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph));
127                            } else {
128                                    ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph);
129                            }
130                    }
131            }
132            
133            public void train() throws MaltChainedException {
134                    if (getGuide() == null) {
135                            ((Trainer)getAlgorithm()).train();
136                    }
137            }
138            
139            public void terminate(Object[] arguments) throws MaltChainedException {
140    //              if (getAlgorithm() instanceof Trainer) {
141    //                      ((Trainer)getAlgorithm()).terminate();
142    //              }
143                    getAlgorithm().terminate();
144                    if (getGuide() != null) {
145                            getGuide().terminate();
146                    }
147                    if (mode == LEARN) {
148                            endTime = System.currentTimeMillis();
149                            long elapsed = endTime - startTime;
150                            if (configLogger.isInfoEnabled()) {
151                                    configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
152                            }
153                    } else if (mode == PARSE) {
154                            endTime = System.currentTimeMillis();
155                            long elapsed = endTime - startTime;
156                            if (configLogger.isInfoEnabled()) {
157                                    configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
158                            }
159                    }
160                    if (SystemLogger.logger() != configLogger && configLogger != null) {
161                            configLogger.removeAllAppenders();
162                    }
163            }
164            
165            /**
166             * Initialize the configuration logger
167             * 
168             * @return the configuration logger
169             * @throws MaltChainedException
170             */
171            public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
172                    if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
173                            configLogger = Logger.getLogger(logfile);
174                            FileAppender fileAppender = null;
175                            try {
176                                    fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
177                            } catch(IOException e) {
178                                    throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
179                            }
180                            fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
181                            configLogger.addAppender(fileAppender);
182                            configLogger.setLevel(Level.toLevel(level, Level.INFO));        
183                    } else {
184                            configLogger = SystemLogger.logger();
185                    }
186    
187                    return configLogger;
188            }
189            
190            public Logger getConfigLogger() {
191                    return configLogger;
192            }
193    
194            public void setConfigLogger(Logger logger) {
195                    configLogger = logger;
196            }
197            
198            public ConfigurationDir getConfigurationDir() {
199                    return configDir;
200            }
201            
202            public void setConfigurationDir(ConfigurationDir configDir) {
203                    this.configDir = configDir;
204            }
205            
206            public int getMode() {
207                    return mode;
208            }
209            
210            public ConfigurationRegistry getRegistry() {
211                    return registry;
212            }
213    
214            public void setRegistry(ConfigurationRegistry registry) {
215                    this.registry = registry;
216            }
217    
218            public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
219                    return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
220            }
221            
222            public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
223                    return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
224            }
225            
226            public OptionManager getOptionManager() throws MaltChainedException {
227                    return OptionManager.instance();
228            }
229            /******************************** MaltParserConfiguration specific  ********************************/
230            
231            /**
232             * Returns the list of symbol tables
233             * 
234             * @return the list of symbol tables
235             */
236            public SymbolTableHandler getSymbolTables() {
237                    return symbolTableHandler;
238            }
239            
240            public PropagationManager getPropagationManager() {
241                    return propagationManager;
242            }
243    
244            public Algorithm getAlgorithm() {
245                    return parsingAlgorithm;
246            }
247            /**
248             * Returns the guide
249             * 
250             * @return the guide
251             */
252            public ClassifierGuide getGuide() {
253                    return parsingAlgorithm.getGuide();
254            }
255            
256            public void checkOptionDependency() throws MaltChainedException {
257                    try {
258                            if (configDir.getInfoFileWriter() != null) {
259                                    configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
260                            }
261                            
262                            // Copy the feature model file into the configuration directory
263                            String featureModelFileName = getOptionValue("guide", "features").toString().trim();
264                            if (featureModelFileName.equals("")) {
265                                    // use default feature model depending on the selected parser algorithm
266                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
267                                    featureModelFileName = getOptionValue("guide", "features").toString().trim();
268                                    featureModelFileName = featureModelFileName.replace("{learner}", getOptionValueString("guide", "learner"));
269                                    featureModelFileName = configDir.copyToConfig(Util.findURLinJars(featureModelFileName));
270                            } else {
271                                    featureModelFileName = configDir.copyToConfig(featureModelFileName);
272                            }
273                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
274                            if (configDir.getInfoFileWriter() != null) {
275                                    configDir.getInfoFileWriter().write("--guide-features (  -F)                 "+getOptionValue("guide", "features").toString()+"\n");
276                            }
277    
278                            if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
279                                    configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
280                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
281                                    if (configDir.getInfoFileWriter() != null) {
282                                            configDir.getInfoFileWriter().write("--guide-data_split_structure (  -s)\n");
283                                    }
284                            }
285                            if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
286                                    configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
287                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
288                                    if (configDir.getInfoFileWriter() != null) {
289                                            configDir.getInfoFileWriter().write("--guide-data_split_column (  -d)\n");
290                                    }
291                            }
292                            
293                            String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
294                            String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
295                            String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
296                            StringBuilder newDecisionSettings = new StringBuilder();
297    //                      if ((Boolean)getOptionValue("malt0.4", "behavior") == true) {
298    //                              decisionSettings = "T.TRANS+A.DEPREL";
299    //                      }
300                            if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
301                                    decisionSettings = "T.TRANS+A.DEPREL";
302                            } else {
303                                    decisionSettings = decisionSettings.toUpperCase();
304                            }
305                            
306                            if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
307                                    if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
308                                            newDecisionSettings.append("+A.PPLIFTED");
309                                    }
310                            }
311                            if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
312                                    if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
313                                            newDecisionSettings.append("+A.PPPATH");
314                                    }
315                            }
316                            if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
317                                    newDecisionSettings.append("+A.PPCOVERED");
318                            }
319                            if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
320                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
321                                    if (configDir.getInfoFileWriter() != null) {
322                                            configDir.getInfoFileWriter().write("--guide-decision_settings (  -gds)                 "+getOptionValue("guide", "decision_settings").toString()+"\n");
323                                    }
324                            }
325                            if (configDir.getInfoFileWriter() != null) {
326                                    configDir.getInfoFileWriter().flush();
327                            }
328                    } catch (IOException e) {
329                            throw new ConfigurationException("Could not write to the configuration information file. ", e);
330                    }
331            }
332    }