001    package org.maltparser.ml.liblinear;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintStream;
012    import java.util.ArrayList;
013    import java.util.HashMap;
014    import java.util.LinkedHashMap;
015    import java.util.Map;
016    import java.util.Set;
017    import java.util.jar.JarEntry;
018    import java.util.regex.Pattern;
019    import java.util.regex.PatternSyntaxException;
020    
021    import liblinear.FeatureNode;
022    import liblinear.Linear;
023    import liblinear.Model;
024    import liblinear.Parameter;
025    import liblinear.Problem;
026    import liblinear.SolverType;
027    
028    
029    
030    import org.maltparser.core.exception.MaltChainedException;
031    import org.maltparser.core.feature.FeatureVector;
032    import org.maltparser.core.feature.function.FeatureFunction;
033    import org.maltparser.core.feature.value.FeatureValue;
034    import org.maltparser.core.feature.value.MultipleFeatureValue;
035    import org.maltparser.core.feature.value.SingleFeatureValue;
036    import org.maltparser.core.helper.NoPrintStream;
037    import org.maltparser.core.syntaxgraph.DependencyStructure;
038    import org.maltparser.ml.LearningMethod;
039    import org.maltparser.parser.DependencyParserConfig;
040    import org.maltparser.parser.guide.instance.InstanceModel;
041    import org.maltparser.parser.history.action.SingleDecision;
042    import org.maltparser.parser.history.kbest.KBestList;
043    import org.maltparser.parser.history.kbest.ScoredKBestList;
044    
045    
046    public class Liblinear implements LearningMethod {
047            public final static String LIBLINEAR_VERSION = "1.51";
048            public enum Verbostity {
049                    SILENT, ERROR, ALL
050            }
051            private LinkedHashMap<String, String> liblinearOptions;
052             
053            protected InstanceModel owner;
054            protected int learnerMode;
055            protected String name;
056            protected int numberOfInstances;
057            protected boolean saveInstanceFiles;
058            protected boolean excludeNullValues;
059            protected String pathExternalLiblinearTrain = null;
060            private int[] cardinalities;
061            /**
062             * Instance output stream writer 
063             */
064            private BufferedWriter instanceOutput = null; 
065            /**
066             * Liblinear model object, only used during classification.
067             */
068            private Model model = null;
069            
070            /**
071             * Parameter string
072             */
073            private String paramString;
074    
075            private ArrayList<FeatureNode> xlist = null;
076    
077            private Verbostity verbosity;
078            /**
079             * Constructs a Liblinear learner.
080             * 
081             * @param owner the guide model owner
082             * @param learnerMode the mode of the learner TRAIN or CLASSIFY
083             */
084            public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
085                    setOwner(owner);
086                    setLearningMethodName("liblinear");
087                    setLearnerMode(learnerMode.intValue());
088                    setNumberOfInstances(0);
089                    verbosity = Verbostity.SILENT;
090    
091                    liblinearOptions = new LinkedHashMap<String, String>();
092                    initLiblinearOptions();
093                    parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString());
094                    initSpecialParameters();
095                    if (learnerMode == BATCH) {
096    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
097    //                              if (pathExternalLiblinearTrain != null) {
098    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : Liblinear external "+ getLibLinearOptions() + "\n");
099    //                              } else {
100    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions() + "\n");
101    //                              }
102    //                      }
103                            instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
104                    } 
105    //              else {
106    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
107    //                              owner.getGuide().getConfiguration().getConfigLogger().info("  Classifier           : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions()+ "\n");
108    //                      }
109    //              }
110            }
111            
112            
113            public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
114                    if (featureVector == null) {
115                            throw new LiblinearException("The feature vector cannot be found");
116                    } else if (decision == null) {
117                            throw new LiblinearException("The decision cannot be found");
118                    }       
119                    try {
120                            instanceOutput.write(decision.getDecisionCode()+"\t");
121                            for (int i = 0; i < featureVector.size(); i++) {
122                                    FeatureValue featureValue = featureVector.get(i).getFeatureValue();
123                                    if (excludeNullValues == true && featureValue.isNullValue()) {
124                                            instanceOutput.write("-1");
125                                    } else {
126                                            if (featureValue instanceof SingleFeatureValue) {
127                                                    instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
128                                            } else if (featureValue instanceof MultipleFeatureValue) {
129                                                    Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
130                                                    int j=0;
131                                                    for (Integer value : values) {
132                                                            instanceOutput.write(value.toString());
133                                                            if (j != values.size()-1) {
134                                                                    instanceOutput.write("|");
135                                                            }
136                                                            j++;
137                                                    }
138                                            }
139                                    }
140                                    if (i != featureVector.size()) {
141                                            instanceOutput.write('\t');
142                                    }
143                            }
144    
145                            instanceOutput.write('\n');
146                            instanceOutput.flush();
147                            increaseNumberOfInstances();
148                    } catch (IOException e) {
149                            throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e);
150                    }
151            }
152            
153            public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
154            
155            /* (non-Javadoc)
156             * @see org.maltparser.ml.LearningMethod#noMoreInstances()
157             */
158            public void noMoreInstances() throws MaltChainedException {
159                    closeInstanceWriter();
160            }
161    
162    
163            /* (non-Javadoc)
164             * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
165             */
166            public void train(FeatureVector featureVector) throws MaltChainedException {
167                    if (featureVector == null) {
168                            throw new LiblinearException("The feature vector cannot be found. ");
169                    } else if (owner == null) {
170                            throw new LiblinearException("The parent guide model cannot be found. ");
171                    }
172                    cardinalities = getCardinalities(featureVector);
173                    if (pathExternalLiblinearTrain == null) {
174                            try {
175                                    final Problem problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities);
176                                    if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
177                                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model "+getFile(".mod").getName()+"\n");
178                                    }
179                                    final PrintStream out = System.out;
180                                    final PrintStream err = System.err;
181                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
182                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
183                                    Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters()));
184                                    System.setOut(err);
185                                    System.setOut(out);
186                                    if (!saveInstanceFiles) {
187                                            getFile(".ins").delete();
188                                    }
189                            } catch (OutOfMemoryError e) {
190                                    throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
191                            } catch (IllegalArgumentException e) {
192                                    throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
193                            } catch (SecurityException e) {
194                                    throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
195                            } catch (IOException e) {
196                                    throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
197                            }
198                    } else {
199                            trainExternal(featureVector);
200                    }
201                    saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities);
202            }
203            
204            @Override
205            public double crossValidate(FeatureVector featureVector, int nrOfSplits)
206                            throws MaltChainedException {
207                    if (featureVector == null) {
208                            throw new LiblinearException("The feature vector cannot be found. ");
209                    } else if (owner == null) {
210                            throw new LiblinearException("The parent guide model cannot be found. ");
211                    }
212                    
213                    cardinalities = getCardinalities(featureVector);
214                    
215                    double crossValidationAccuracy = 0.0;
216                    
217                    //if (pathExternalLiblinearTrain == null) {
218                            try {
219                                    final Problem problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities);
220                                    if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
221                                            owner.getGuide().getConfiguration().getConfigLogger().info("Doing cross validation for model "+ owner.getModelName() + "\n");
222                                    }
223                                    final PrintStream out = System.out;
224                                    final PrintStream err = System.err;
225                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
226                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
227    
228                                    int[] target = new int[problem.l];
229                                    
230                                    Linear.crossValidation(problem, getLiblinearParameters(), nrOfSplits, target);
231    
232                                    double totalCorrect = 0;
233                                    for (int i = 0; i < problem.l; i++)
234                                            if (target[i] == problem.y[i]) ++totalCorrect;
235    
236                                    if(totalCorrect>0)
237                                            crossValidationAccuracy = 100.0 * totalCorrect / problem.l;
238                                                                    
239                                    System.setOut(err);
240                                    System.setOut(out);
241                                    //Don't delete the instance file here
242                                    //if (!saveInstanceFiles) {
243                                    //      getFile(".ins").delete();
244                                    //}
245                            } catch (OutOfMemoryError e) {
246                                    throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
247                            } catch (IllegalArgumentException e) {
248                                    throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
249                            } catch (SecurityException e) {
250                                    throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
251                            }
252                    //} else {
253                    //      trainExternal(featureVector);
254                    //}
255    
256                    return crossValidationAccuracy;
257            }
258            
259            private void trainExternal(FeatureVector featureVector) throws MaltChainedException {
260                    try {           
261                            maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
262                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model (external) "+getFile(".mod").getName());
263    
264                            final String[] params = getLibLinearParamStringArray();
265                            String[] arrayCommands = new String[params.length+3];
266                            int i = 0;
267                            arrayCommands[i++] = pathExternalLiblinearTrain;
268                            for (; i <= params.length; i++) {
269                                    arrayCommands[i] = params[i-1];
270                            }
271                            arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
272                            arrayCommands[i++] = getFile(".mod").getAbsolutePath();
273                            
274                    if (verbosity == Verbostity.ALL) {
275                            owner.getGuide().getConfiguration().getConfigLogger().info('\n');
276                    }
277                            final Process child = Runtime.getRuntime().exec(arrayCommands);
278                    final InputStream in = child.getInputStream();
279                    final InputStream err = child.getErrorStream();
280                    int c;
281                    while ((c = in.read()) != -1){
282                            if (verbosity == Verbostity.ALL) {
283                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
284                            }
285                    }
286                    while ((c = err.read()) != -1){
287                            if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
288                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
289                            }
290                    }
291                if (child.waitFor() != 0) {
292                    owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
293                }
294                    in.close();
295                    err.close();
296                    if (!saveInstanceFiles) {
297                                    getFile(".ins").delete();
298                                    getFile(".ins.tmp").delete();
299                    }
300                    owner.getGuide().getConfiguration().getConfigLogger().info('\n');
301                    } catch (InterruptedException e) {
302                             throw new LiblinearException("Liblinear is interrupted. ", e);
303                    } catch (IllegalArgumentException e) {
304                            throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
305                    } catch (SecurityException e) {
306                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
307                    } catch (IOException e) {
308                            throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
309                    } catch (OutOfMemoryError e) {
310                            throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
311                    }
312            }
313            
314            private int[] getCardinalities(FeatureVector featureVector) {
315                    int[] cardinalities = new int[featureVector.size()];
316                    int i = 0;
317                    for (FeatureFunction feature : featureVector) {
318                            cardinalities[i++] = feature.getFeatureValue().getCardinality();
319                    }
320                    return cardinalities;
321            }
322            
323            private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
324                    final BufferedWriter out = new BufferedWriter(osw);
325                    try {
326                            for (int i = 0, n = cardinalities.length; i < n; i++) {
327                                    out.write(Integer.toString(cardinalities[i]));
328                                    if (i < n - 1) {
329                                            out.write(',');
330                                    }
331                            }
332                            out.write('\n');
333                            out.close();
334                    } catch (IOException e) {
335                            throw new LiblinearException("", e);
336                    }
337            }
338            
339            private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException {
340                    int[] cardinalities = null;
341                    try {
342                            final BufferedReader in = new BufferedReader(isr); 
343                            String line;
344                            if ((line = in.readLine()) != null) {
345                                    String[] items = line.split(",");
346                                    cardinalities = new int[items.length];
347                                    for (int i = 0; i < items.length; i++) {
348                                            cardinalities[i] = Integer.parseInt(items[i]);
349                                    }
350                            }
351                            in.close();
352                    } catch (IOException e) {
353                            throw new LiblinearException("", e);
354                    } catch (NumberFormatException e) {
355                            throw new LiblinearException("", e);
356                    }
357                    return cardinalities;
358            }
359            
360            /* (non-Javadoc)
361             * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList)
362             */
363            public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
364                    if (method == null) {
365                            throw new LiblinearException("The learning method cannot be found. ");
366                    } else if (divideFeature == null) {
367                            throw new LiblinearException("The divide feature cannot be found. ");
368                    } 
369                    
370                    try {
371                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
372                            final BufferedWriter out = method.getInstanceWriter();
373                            final StringBuilder sb = new StringBuilder(6);
374                            int l = in.read();
375                            char c;
376                            int j = 0;
377            
378                            while(true) {
379                                    if (l == -1) {
380                                            sb.setLength(0);
381                                            break;
382                                    }
383                                    c = (char)l; 
384                                    l = in.read();
385                                    if (c == '\t') {
386                                            if (divideFeatureIndexVector.contains(j-1)) {
387                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
388                                                    out.write('\t');
389                                            }
390                                            out.write(sb.toString());
391                                            j++;
392                                            out.write('\t');
393                                            sb.setLength(0);
394                                    } else if (c == '\n') {
395                                            out.write(sb.toString());
396                                            if (divideFeatureIndexVector.contains(j-1)) {
397                                                    out.write('\t');
398                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
399                                            }
400                                            out.write('\n');
401                                            sb.setLength(0);
402                                            method.increaseNumberOfInstances();
403                                            this.decreaseNumberOfInstances();
404                                            j = 0;
405                                    } else {
406                                            sb.append(c);
407                                    }
408                            }       
409                            in.close();
410                            getFile(".ins").delete();
411                            out.flush();
412                    } catch (SecurityException e) {
413                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
414                    } catch (NullPointerException  e) {
415                            throw new LiblinearException("The instance file cannot be found. ", e);
416                    } catch (FileNotFoundException e) {
417                            throw new LiblinearException("The instance file cannot be found. ", e);
418                    } catch (IOException e) {
419                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
420                    }
421    
422            }
423            
424            /* (non-Javadoc)
425             * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
426             */
427            public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
428                    
429                    if (model == null) {
430                            try {
431                                    model = Linear.loadModel(new BufferedReader(getInstanceInputStreamReaderFromConfigFile(".mod")));
432                            } catch (IOException e) {
433                                    throw new LiblinearException("The model cannot be loaded. ", e);
434                            }
435                    }
436    
437                    if (cardinalities == null) {
438                            if (getConfigFileEntry(".car") != null) {
439                                    cardinalities = loadCardinalities(getInstanceInputStreamReaderFromConfigFile(".car"));
440                            } else {
441                                    cardinalities = getCardinalities(featureVector);
442                            }
443                    }
444                    //System.out.println("METHOD PREDICT CARDINALITIES SIZE" + cardinalities.length + " FEATURE VECTOR SIZE " +featureVector.size());
445                    if (xlist == null) {
446                            xlist = new ArrayList<FeatureNode>(featureVector.size()); 
447                    }
448                    if (model == null) { 
449                            throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. ");
450                    } else if (featureVector == null) {
451                            throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. ");
452                    }
453                    int j = 0;
454                    int offset = 1;
455                    int i = 0;
456                    for (FeatureFunction feature : featureVector) {
457                            final FeatureValue featureValue = feature.getFeatureValue();
458                            if (!(excludeNullValues == true && featureValue.isNullValue())) {
459                                    if (featureValue instanceof SingleFeatureValue) {
460                                            if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) {
461                                                    xlist.add(j++, new FeatureNode(((SingleFeatureValue)featureValue).getCode() + offset, 1));
462                                            }
463                                    } else if (featureValue instanceof MultipleFeatureValue) {
464                                            for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
465                                                    if (value < cardinalities[i]) {
466                                                            xlist.add(j++, new FeatureNode(value + offset, 1));
467                                                    }
468                                            }
469                                    }
470                            }
471                            offset += cardinalities[i];
472                            i++;
473                    }
474                    
475                    FeatureNode[] xarray = new FeatureNode[j];
476                    for (int k = 0; k < j; k++) {
477                            xarray[k] = xlist.get(k);
478                    }
479    
480                    if (decision.getKBestList().getK() == 1) {
481                            decision.getKBestList().add(Linear.predict(model, xarray));
482                    } else {
483                            liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
484                    }
485                    
486                    xlist.clear();
487    
488                    return true;
489            }
490            
491    
492            public void terminate() throws MaltChainedException { 
493                    closeInstanceWriter();
494                    model = null;
495                    xlist = null;
496                    owner = null;
497            }
498    
499            public BufferedWriter getInstanceWriter() {
500                    return instanceOutput;
501            }
502            
503            protected void closeInstanceWriter() throws MaltChainedException {
504                    try {
505                            if (instanceOutput != null) {
506                                    instanceOutput.flush();
507                                    instanceOutput.close();
508                                    instanceOutput = null;
509                            }
510                    } catch (IOException e) {
511                            throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e);
512                    }
513            }
514            
515            
516            /**
517             * Returns the parameter string for used for configure Liblinear
518             * 
519             * @return the parameter string for used for configure Liblinear
520             */
521            public String getParamString() {
522                    return paramString;
523            }
524            
525            public InstanceModel getOwner() {
526                    return owner;
527            }
528    
529            protected void setOwner(InstanceModel owner) {
530                    this.owner = owner;
531            }
532            
533            public int getLearnerMode() {
534                    return learnerMode;
535            }
536    
537            public void setLearnerMode(int learnerMode) throws MaltChainedException {
538                    this.learnerMode = learnerMode;
539            }
540            
541            public String getLearningMethodName() {
542                    return name;
543            }
544            
545            /**
546             * Returns the current configuration
547             * 
548             * @return the current configuration
549             * @throws MaltChainedException
550             */
551            public DependencyParserConfig getConfiguration() throws MaltChainedException {
552                    return owner.getGuide().getConfiguration();
553            }
554            
555            public int getNumberOfInstances() throws MaltChainedException {
556                    if(numberOfInstances!=0)
557                            return numberOfInstances;
558                    else{
559                            //Do a line count of the instance file and return that
560                            
561                            BufferedReader reader = new BufferedReader( getInstanceInputStreamReader(".ins"));
562                            try {
563                                    while(reader.readLine()!=null){
564                                            numberOfInstances++;
565                                            owner.increaseFrequency();
566                                    }
567                                    
568                                    reader.close();
569                            } catch (IOException e) {
570                                    throw new MaltChainedException("No instances found in file",e);
571                            }
572                            
573                            
574                            
575                            return numberOfInstances;
576                            
577                    }
578            }
579    
580            public void increaseNumberOfInstances() {
581                    numberOfInstances++;
582                    owner.increaseFrequency();
583            }
584            
585            public void decreaseNumberOfInstances() {
586                    numberOfInstances--;
587                    owner.decreaseFrequency();
588            }
589            
590            protected void setNumberOfInstances(int numberOfInstances) {
591                    this.numberOfInstances = 0;
592            }
593    
594            protected void setLearningMethodName(String name) {
595                    this.name = name;
596            }
597            
598            protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
599                    return getConfiguration().getConfigurationDir().getAppendOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
600            }
601            
602            protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
603                    return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
604            }
605            
606            protected InputStreamReader getInstanceInputStreamReaderFromConfigFile(String suffix) throws MaltChainedException {
607                    return getConfiguration().getConfigurationDir().getInputStreamReaderFromConfigFile(owner.getModelName()+getLearningMethodName()+suffix);
608            }
609            
610            protected File getFile(String suffix) throws MaltChainedException {
611                    return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix);
612            }
613            
614            protected JarEntry getConfigFileEntry(String suffix) throws MaltChainedException {
615                    return getConfiguration().getConfigurationDir().getConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix);
616            }
617            /**
618             * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
619             * 
620             * @param isr   the instance stream reader for the instance file
621             * @param cardinalities a array containing the number of distinct values for a particular column.
622             * @throws LiblinearException
623             */
624            public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException {
625                    Problem problem = new Problem();
626    
627    
628                    
629                    try {
630                            final BufferedReader fp = new BufferedReader(isr);
631                            int max_index = 0;
632                            if (xlist == null) {
633                                    xlist = new ArrayList<FeatureNode>(); 
634                            }
635                            problem.bias = getBias();
636                            problem.l = getNumberOfInstances();
637                            problem.x = new FeatureNode[problem.l][];
638                            problem.y = new int[problem.l];
639                            int i = 0;
640                            final Pattern tabPattern = Pattern.compile("\t");
641                            final Pattern pipePattern = Pattern.compile("\\|");
642                            while(true) {
643                                    String line = fp.readLine();
644    
645                                    if(line == null) break;
646                                    String[] columns = tabPattern.split(line);
647    
648                                    if (columns.length == 0) {
649                                            continue;
650                                    }
651                                    
652                                    int offset = 1; 
653                                    int j = 0;
654                                    try {
655                                            problem.y[i] = 
656                                                    Integer.parseInt(columns[j]);
657                                            int p = 0;
658                                            for(j = 1; j < columns.length; j++) {
659                                                    final String[] items = pipePattern.split(columns[j]);   
660                                                    for (int k = 0; k < items.length; k++) {
661                                                            try {
662                                                                    if (Integer.parseInt(items[k]) != -1) {
663                                                                            xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1));
664                                                                            p++;
665                                                                    }
666                                                            } catch (NumberFormatException e) {
667                                                                    throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
668                                                            }
669                                                    }
670                                                    offset += cardinalities[j-1];
671                                            }
672                                            problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]);
673                                            if(columns.length > 1) {
674                                                    max_index = Math.max(max_index, problem.x[i][p-1].index);
675                                            }
676                                            i++;
677                                            xlist.clear();
678                                    } catch (ArrayIndexOutOfBoundsException e) {
679                                            throw new LiblinearException("Cannot read from the instance file. ", e);
680                                    }
681                            }
682                            fp.close();     
683                            problem.n = max_index;
684                            if ( problem.bias >= 0 ) {
685                                    problem.n++;
686                            }
687                            xlist = null;
688                    } catch (IOException e) {
689                            throw new LiblinearException("Cannot read from the instance file. ", e);
690                    }
691                    return problem;
692            }
693            
694            protected void initSpecialParameters() throws MaltChainedException {
695                    if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
696                            excludeNullValues = true;
697                    } else {
698                            excludeNullValues = false;
699                    }
700                    saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue();
701                            
702                    if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) {
703                            try {
704                                    if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) {
705                                            throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong.");
706                                    }
707                                    if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) {
708                                            throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file");
709                                    }
710                                    if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) {
711                                            throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. ");
712                                    }
713                                    pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString();
714                            } catch (SecurityException e) {
715                                    throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e);
716                            }
717                    }
718                    if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) {
719                            verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase());
720                    }
721            }
722            
723            public String getLibLinearOptions() {
724                    StringBuilder sb = new StringBuilder();
725                    for (String key : liblinearOptions.keySet()) {
726                            sb.append('-');
727                            sb.append(key);
728                            sb.append(' ');
729                            sb.append(liblinearOptions.get(key));
730                            sb.append(' ');
731                    }
732                    return sb.toString();
733            }
734            
735            public void parseParameters(String paramstring) throws MaltChainedException {
736                    if (paramstring == null) {
737                            return;
738                    }
739                    final String[] argv;
740                    String allowedFlags = "sceB";
741                    try {
742                            argv = paramstring.split("[_\\p{Blank}]");
743                    } catch (PatternSyntaxException e) {
744                            throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e);
745                    }
746                    for (int i=0; i < argv.length-1; i++) {
747                            if(argv[i].charAt(0) != '-') {
748                                    throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
749                            }
750                            if(++i>=argv.length) {
751                                    throw new LiblinearException("The last argument does not have any value. ");
752                            }
753                            try {
754                                    int index = allowedFlags.indexOf(argv[i-1].charAt(1));
755                                    if (index != -1) {
756                                            liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]);
757                                    } else {
758                                            throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");                
759                                    }
760                            } catch (ArrayIndexOutOfBoundsException e) {
761                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
762                            } catch (NumberFormatException e) {
763                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);     
764                            } catch (NullPointerException e) {
765                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);     
766                            }
767                    }
768            }
769            
770            public double getBias() throws MaltChainedException {
771                    try {
772                            return Double.valueOf(liblinearOptions.get("B")).doubleValue();
773                    } catch (NumberFormatException e) {
774                            throw new LiblinearException("The liblinear bias value is not numerical value. ", e);
775                    }
776            }
777    
778            public Parameter getLiblinearParameters() throws MaltChainedException {
779                    Parameter param = new Parameter(SolverType.MCSVM_CS, 0.1, 0.1);
780                    String type = liblinearOptions.get("s");
781                    
782                    if (type.equals("0")) {
783                            param.setSolverType(SolverType.L2R_LR);
784                    } else if (type.equals("1")) {
785                            param.setSolverType(SolverType.L2R_L2LOSS_SVC_DUAL);
786                    } else if (type.equals("2")) {
787                            param.setSolverType(SolverType.L2R_L2LOSS_SVC);
788                    } else if (type.equals("3")) {
789                            param.setSolverType(SolverType.L2R_L1LOSS_SVC_DUAL);
790                    } else if (type.equals("4")) {
791                            param.setSolverType(SolverType.MCSVM_CS);
792                    } else if (type.equals("5")) {
793                            param.setSolverType(SolverType.L1R_L2LOSS_SVC); 
794                    } else if (type.equals("6")) {
795                            param.setSolverType(SolverType.L1R_LR); 
796                    } else {
797                            throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. ");
798                    }
799                    try {
800                            param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue());
801                    } catch (NumberFormatException e) {
802                            throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e);
803                    }
804                    try {
805                            param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue());
806                    } catch (NumberFormatException e) {
807                            throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e);
808                    }
809                    return param;
810            }
811    
812            public void initLiblinearOptions() {
813                    liblinearOptions.put("s", "4"); // type = SolverType.L2LOSS_SVM_DUAL (default)
814                    liblinearOptions.put("c", "0.1"); // cost = 1 (default)
815                    liblinearOptions.put("e", "0.1"); // epsilon = 0.1 (default)
816                    liblinearOptions.put("B", "1"); // bias = 1 (default)
817            }
818    
819            public String[] getLibLinearParamStringArray() {
820                    final ArrayList<String> params = new ArrayList<String>();
821    
822                    for (String key : liblinearOptions.keySet()) {
823                            params.add("-"+key); params.add(liblinearOptions.get(key));
824                    }
825                    return params.toArray(new String[params.size()]);
826            }
827            
828            
829            public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException {
830                    int i;
831                    final int nr_class = model.getNrClass();
832                    final double[] dec_values = new double[nr_class];
833    
834                    Linear.predictValues(model, x, dec_values);
835                    final int[] labels = model.getLabels();
836                    int[] predictionList = new int[nr_class];
837                    for(i=0;i<nr_class;i++) {
838                            predictionList[i] = labels[i];
839                    }
840    
841                    double tmpDec;
842                    int tmpObj;
843                    int lagest;
844                    for (i=0;i<nr_class-1;i++) {
845                            lagest = i;
846                            for (int j=i;j<nr_class;j++) {
847                                    if (dec_values[j] > dec_values[lagest]) {
848                                            lagest = j;
849                                    }
850                            }
851                            tmpDec = dec_values[lagest];
852                            dec_values[lagest] = dec_values[i];
853                            dec_values[i] = tmpDec;
854                            tmpObj = predictionList[lagest];
855                            predictionList[lagest] = predictionList[i];
856                            predictionList[i] = tmpObj;
857                    }
858                    
859                    int k = nr_class-1;
860                    if (kBestList.getK() != -1) {
861                            k = kBestList.getK() - 1;
862                    }
863                    
864                    for (i=0; i<nr_class && k >= 0; i++, k--) {
865                            if (kBestList instanceof ScoredKBestList) {
866                                    ((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]);
867                            } else {
868                                    kBestList.add(predictionList[i]);
869                            }
870    
871                    }
872            }
873            
874            /**
875             * Converts the instance file (Malt's own SVM format) into the Liblinear (SVMLight) format. The input instance file is removed (replaced)
876             * by the instance file in the Liblinear (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
877             * 
878             * @param isr the input stream reader for the source instance file
879             * @param osw   the output stream writer for the destination instance file
880             * @param cardinalities a vector containing the number of distinct values for a particular column
881             * @throws LiblinearException
882             */
883            public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
884                    try {
885                            final BufferedReader in = new BufferedReader(isr);
886                            final BufferedWriter out = new BufferedWriter(osw);
887    
888                            int c;
889                            int j = 0;
890                            int offset = 1;
891                            int code = 0;
892                            while(true) {
893                                    c = in.read();
894                                    if (c == -1) {
895                                            break;
896                                    }
897                                    
898                                    if (c == '\t' || c == '|') {
899                                            if (j == 0) {
900                                                    out.write(Integer.toString(code));
901                                                    j++;
902                                            } else {
903                                                    if (code != -1) {
904                                                            out.write(' ');
905                                                            out.write(Integer.toString(code+offset));
906                                                            out.write(":1");
907                                                    }
908                                                    if (c == '\t') {
909                                                            offset += cardinalities[j-1];
910                                                            j++;
911                                                    }
912                                            }
913                                            code = 0;
914                                    } else if (c == '\n') {
915                                            j = 0;
916                                            offset = 1;
917                                            out.write('\n');
918                                            code = 0;
919                                    } else if (c == '-') {
920                                            code = -1;
921                                    } else if (code != -1) {
922                                            if (c > 47 && c < 58) {
923                                                    code = code * 10 + (c-48);
924                                            } else {
925                                                    throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format.");
926                                            }
927                                    }       
928                            }                       
929                            in.close();     
930                            out.close();
931                    } catch (IOException e) {
932                            throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e);
933                    }
934            }
935            
936            protected void finalize() throws Throwable {
937                    try {
938                            closeInstanceWriter();
939                    } finally {
940                            super.finalize();
941                    }
942            }
943            
944            /* (non-Javadoc)
945             * @see java.lang.Object#toString()
946             */
947            public String toString() {
948                    final StringBuffer sb = new StringBuffer();
949                    sb.append("\nLiblinear INTERFACE\n");
950                    sb.append("  Liblinear version: "+LIBLINEAR_VERSION+"\n");
951                    sb.append("  Liblinear string: "+paramString+"\n");
952                    
953                    sb.append(getLibLinearOptions());
954                    return sb.toString();
955            }
956    
957    
958            @Override
959            public void divideByFeatureSet(
960                            Set<Integer> featureIdsToCreateSeparateBranchesForSet, ArrayList<Integer> divideFeatureIndexVector, String otherId)  throws MaltChainedException {
961    
962                    
963                    //Create a hash map that maps every feature id to a writer
964                    HashMap<Integer, BufferedWriter>   featureIdToWriterMap = new HashMap<Integer, BufferedWriter>();
965                    
966                    for(int element:featureIdsToCreateSeparateBranchesForSet){
967                     
968    
969                            BufferedWriter outputWriter = new BufferedWriter(getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName().replace('.','_') + element + "." + getLearningMethodName()+".ins"));
970                            featureIdToWriterMap.put(element, outputWriter);
971                    
972                    }
973                    
974                    BufferedWriter otherOutputWriter = new BufferedWriter(getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName().replace('.','_') + otherId + "." + getLearningMethodName()+".ins"));
975    
976                    
977                    try {
978                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
979                            //every line will be written to a separate file
980                            String line = in.readLine();
981                            final Pattern tabPattern = Pattern.compile("\t");
982                            while(line!=null){
983                                    
984                                    //Find out which pot the line shall be put in
985                                    String[] lineArray = tabPattern.split(line);
986                                    
987                                    int id = new Integer(lineArray[divideFeatureIndexVector.get(0)+1]);
988                                    
989                                    if(!featureIdToWriterMap.containsKey(id)){
990                                            otherOutputWriter.write(line + "\n");
991                                    }else    
992                                            featureIdToWriterMap.get(id).write(getLineToWrite(lineArray,divideFeatureIndexVector.get(0)+1));
993                                    
994                                    line = in.readLine();
995                            }
996                            
997                            otherOutputWriter.close();
998                            
999                            in.close();
1000                            
1001                            for(BufferedWriter writer: featureIdToWriterMap.values())
1002                                    writer.close();
1003    
1004                    } catch (SecurityException e) {
1005                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
1006                    } catch (NullPointerException  e) {
1007                            throw new LiblinearException("The instance file cannot be found. ", e);
1008                    } catch (FileNotFoundException e) {
1009                            throw new LiblinearException("The instance file cannot be found. ", e);
1010                    } catch (IOException e) {
1011                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
1012                    }
1013    
1014                    
1015    
1016            }
1017    
1018    
1019            private String getLineToWrite(String[] lineArray, int excludeIndex) {
1020                    StringBuffer buf = new StringBuffer();
1021                    
1022                    for(int n = 0; n < lineArray.length; n++)
1023                            if(n != excludeIndex)
1024                                    buf.append(lineArray[n] + "\t");
1025                    buf.append("\n");               
1026                    return buf.toString();
1027            }
1028    
1029    
1030            @Override
1031            public Map<Integer, Integer> createFeatureIdToCountMap(
1032                            ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException{
1033    
1034                    HashMap<Integer, Integer> featureIdToCountMap = new HashMap<Integer, Integer>();
1035                    
1036                    //Go trough the file and count all feature ids in the given column(s)
1037                    
1038                    try {
1039                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
1040                            //every line will be written to a separate file
1041                            String line = in.readLine();
1042                            final Pattern tabPattern = Pattern.compile("\t");
1043                            while(line!=null){
1044                                    
1045                                    //Find out which pot the line shall be put in
1046                                    String[] lineArray = tabPattern.split(line);
1047                                    
1048                                    for(int n = 0; n < divideFeatureIndexVector.size(); n++){
1049                                            int id = new Integer(lineArray[divideFeatureIndexVector.get(n)+1]);
1050                                            
1051                                            
1052                                            if (!featureIdToCountMap.containsKey(id)) {
1053    
1054                                                    featureIdToCountMap.put(id, 0);
1055    
1056                                            }
1057    
1058                                            int previousCount = featureIdToCountMap.get(id);
1059                                            
1060                                            featureIdToCountMap.put(id, previousCount + 1);
1061                                            
1062                                    }                               
1063                                    
1064                                    line = in.readLine();
1065                            }
1066                            in.close();
1067                    } catch (SecurityException e) {
1068                            throw new LiblinearException("The Libsvm learner cannot remove the instance file. ", e);
1069                    } catch (NullPointerException  e) {
1070                            throw new LiblinearException("The instance file cannot be found. ", e);
1071                    } catch (FileNotFoundException e) {
1072                            throw new LiblinearException("The instance file cannot be found. ", e);
1073                    } catch (IOException e) {
1074                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
1075                    }
1076                    
1077                    return featureIdToCountMap;
1078            }
1079    
1080    }