001    package org.maltparser.ml.libsvm;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintStream;
012    import java.text.DecimalFormat;
013    import java.text.DecimalFormatSymbols;
014    import java.util.ArrayList;
015    import java.util.HashMap;
016    import java.util.Map;
017    import java.util.Set;
018    import java.util.jar.JarEntry;
019    import java.util.regex.Pattern;
020    import java.util.regex.PatternSyntaxException;
021    
022    import libsvm.svm;
023    import libsvm.svm_model;
024    import libsvm.svm_node;
025    import libsvm.svm_parameter;
026    import libsvm.svm_problem;
027    
028    import org.maltparser.core.exception.MaltChainedException;
029    import org.maltparser.core.feature.FeatureVector;
030    import org.maltparser.core.feature.function.FeatureFunction;
031    import org.maltparser.core.feature.value.FeatureValue;
032    import org.maltparser.core.feature.value.MultipleFeatureValue;
033    import org.maltparser.core.feature.value.SingleFeatureValue;
034    import org.maltparser.core.helper.NoPrintStream;
035    import org.maltparser.core.syntaxgraph.DependencyStructure;
036    import org.maltparser.ml.LearningMethod;
037    import org.maltparser.ml.liblinear.LiblinearException;
038    import org.maltparser.ml.libsvm.LibsvmException;
039    import org.maltparser.parser.DependencyParserConfig;
040    import org.maltparser.parser.guide.instance.InstanceModel;
041    import org.maltparser.parser.history.action.SingleDecision;
042    import org.maltparser.parser.history.kbest.KBestList;
043    import org.maltparser.parser.history.kbest.ScoredKBestList;
044    
045    /**
046    Implements an interface to the LIBSVM learner (currently the LIBSVM 2.91 is used). More information
047    about LIBSVM can be found at 
048    <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>.
049    
050    @author Johan Hall
051    @since 1.0
052    */
053    public class Libsvm implements LearningMethod {
054            public final static String LIBSVM_VERSION = "2.91";
055            public enum Verbostity {
056                    SILENT, ERROR, ALL
057            }
058            protected InstanceModel owner;
059            protected int learnerMode;
060            protected String name;
061            protected int numberOfInstances;
062            protected boolean saveInstanceFiles;
063            protected boolean excludeNullValues;
064            protected String pathExternalSVMTrain = null;
065            private int[] cardinalities;
066    
067            /**
068             * Instance output stream writer 
069             */
070            private BufferedWriter instanceOutput = null; 
071            /**
072             * LIBSVM svm_model object, only used during classification.
073             */
074            private svm_model model = null;
075            
076            /**
077             * LIBSVM svm_parameter object
078             */
079            private svm_parameter svmParam;
080            /**
081             * Parameter string
082             */
083            private String paramString;
084            /**
085             * An array of LIBSVM svm_node objects, only used during classification.
086             */
087            private ArrayList<svm_node> xlist = null;
088    
089            private Verbostity verbosity;
090            /**
091             * Constructs a LIBSVM learner.
092             * 
093             * @param owner the guide model owner
094             * @param learnerMode the mode of the learner TRAIN or CLASSIFY
095             */
096            public Libsvm(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
097                    setOwner(owner);
098                    setLearningMethodName("libsvm");
099                    setLearnerMode(learnerMode.intValue());
100                    setNumberOfInstances(0);
101                    verbosity = Verbostity.SILENT;
102                    initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString());
103                    initSpecialParameters();
104                    if (learnerMode == BATCH) {
105    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
106    //                              if (pathExternalSVMTrain != null) {
107    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : LIBSVM external "+ getParamString() + "\n");
108    //                              } else {
109    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : LIBSVM "+LIBSVM_VERSION+" "+ getParamString() + "\n");
110    //                              }
111    //                      }
112                            instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
113                    } 
114    //              else {
115    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
116    //                              owner.getGuide().getConfiguration().getConfigLogger().info("  Classifier           : LIBSVM "+LIBSVM_VERSION+" "+ getParamString()+ "\n");
117    //                      }
118    //              }
119            }
120            
121            
122            public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
123                    if (featureVector == null) {
124                            throw new LibsvmException("The feature vector cannot be found");
125                    } else if (decision == null) {
126                            throw new LibsvmException("The decision cannot be found");
127                    }       
128                    try {
129                            instanceOutput.write(decision.getDecisionCode()+"\t");
130                            for (int i = 0; i < featureVector.size(); i++) {
131                                    FeatureValue featureValue = featureVector.get(i).getFeatureValue();
132                                    if (excludeNullValues == true && featureValue.isNullValue()) {
133                                            instanceOutput.write("-1");
134                                    } else {
135                                            if (featureValue instanceof SingleFeatureValue) {
136                                                    instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
137                                            } else if (featureValue instanceof MultipleFeatureValue) {
138                                                    Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
139                                                    int j=0;
140                                                    for (Integer value : values) {
141                                                            instanceOutput.write(value.toString());
142                                                            if (j != values.size()-1) {
143                                                                    instanceOutput.write("|");
144                                                            }
145                                                            j++;
146                                                    }
147                                            }
148                                    }
149                                    if (i != featureVector.size()) {
150                                            instanceOutput.write('\t');
151                                    }
152                            }
153    
154                            instanceOutput.write('\n');
155                            instanceOutput.flush();
156                            increaseNumberOfInstances();
157                    } catch (IOException e) {
158                            throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e);
159                    }
160            }
161            
162            public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
163            
164            /* (non-Javadoc)
165             * @see org.maltparser.ml.LearningMethod#noMoreInstances()
166             */
167            public void noMoreInstances() throws MaltChainedException {
168                    closeInstanceWriter();
169            }
170    
171    
172            /* (non-Javadoc)
173             * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
174             */
175            public void train(FeatureVector featureVector) throws MaltChainedException {
176                    if (featureVector == null) {
177                            throw new LibsvmException("The feature vector cannot be found. ");
178                    } else if (owner == null) {
179                            throw new LibsvmException("The parent guide model cannot be found. ");
180                    }
181                    cardinalities = getCardinalities(featureVector);
182                    if (pathExternalSVMTrain == null) {
183                            try {
184                                    final svm_problem prob = readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), cardinalities, svmParam);
185                                    if(svm.svm_check_parameter(prob, svmParam) != null) {
186                                            throw new LibsvmException(svm.svm_check_parameter(prob, svmParam));
187                                    }
188                                    owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model "+getFile(".mod").getName()+"\n");
189                                    final PrintStream out = System.out;
190                                    final PrintStream err = System.err;
191                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
192                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
193                                    
194                                    svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam));
195                                    System.setOut(err);
196                                    System.setOut(out);
197                                    if (!saveInstanceFiles) {
198                                            getFile(".ins").delete();
199                                    }
200                            } catch (OutOfMemoryError e) {
201                                    throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
202                            } catch (IllegalArgumentException e) {
203                                    throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
204                            } catch (SecurityException e) {
205                                    throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
206                            } catch (IOException e) {
207                                    throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
208                            }
209                    } else {
210                            trainExternal(featureVector);
211                    }
212                    saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities);
213            }
214            
215            
216            @Override
217            public double crossValidate(FeatureVector featureVector, int nrOfSplits)
218                            throws MaltChainedException {
219                    if (featureVector == null) {
220                            throw new LibsvmException("The feature vector cannot be found. ");
221                    } else if (owner == null) {
222                            throw new LibsvmException("The parent guide model cannot be found. ");
223                    }
224                    cardinalities = getCardinalities(featureVector);
225                    //TODO Implement support for externial SVM for cross validation  
226                    //if (pathExternalSVMTrain == null) {
227                    
228                    double crossValidationAccuracy = 0.0;
229                    
230                            try {
231                                    final svm_problem prob = readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), cardinalities, svmParam);
232                                    if(svm.svm_check_parameter(prob, svmParam) != null) {
233                                            throw new LibsvmException(svm.svm_check_parameter(prob, svmParam));
234                                    }
235                                    owner.getGuide().getConfiguration().getConfigLogger().info("Doing cross validation\n");
236                                    final PrintStream out = System.out;
237                                    final PrintStream err = System.err;
238                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
239                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
240                                    
241                                    //svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam));
242                                    
243                                    double[] target = new double[prob.l];
244                                    
245                                    svm.svm_cross_validation(prob, svmParam, nrOfSplits, target);                           
246                                    
247                                    System.setOut(err);
248                                    System.setOut(out);
249                                    if (!saveInstanceFiles) {
250                                            getFile(".ins").delete();
251                                    }
252                                    
253                                    
254                                    double total_correct = 0.0;
255                                    
256                                    for(int i=0;i<prob.l;i++)
257                                            if(target[i] == prob.y[i])
258                                                    ++total_correct;
259                                    
260                                    if(total_correct>0)
261                                            crossValidationAccuracy = 100.0*total_correct/prob.l;
262                                    
263                                    
264                            } catch (OutOfMemoryError e) {
265                                    throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
266                            } catch (IllegalArgumentException e) {
267                                    throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
268                            } catch (SecurityException e) {
269                                    throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
270                            }
271                    //} else {
272                    //      trainExternal(featureVector);
273                    //}
274                    //saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities);
275                    
276    
277                            
278                    return crossValidationAccuracy;
279            }
280            
281            
282            private void trainExternal(FeatureVector featureVector) throws MaltChainedException {
283                    try {           
284                            maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
285                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName());
286    
287                            final ArrayList<String> commands = new ArrayList<String>();
288                            commands.add(pathExternalSVMTrain);
289                            final String[] params = getSVMParamStringArray(svmParam);
290                            for (int i=0; i < params.length; i++) {
291                                    commands.add(params[i]);
292                            }
293                            commands.add(getFile(".ins.tmp").getAbsolutePath());
294                            commands.add(getFile(".mod").getAbsolutePath());
295                            String[] arrayCommands =  commands.toArray(new String[commands.size()]);
296                            
297                    if (verbosity == Verbostity.ALL) {
298                            owner.getGuide().getConfiguration().getConfigLogger().info('\n');
299                    }
300                            final Process child = Runtime.getRuntime().exec(arrayCommands);
301                    final InputStream in = child.getInputStream();
302                    final InputStream err = child.getErrorStream();
303                    int c;
304                    while ((c = in.read()) != -1){
305                            if (verbosity == Verbostity.ALL) {
306                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
307                            }
308                    }
309                    while ((c = err.read()) != -1){
310                            if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
311                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
312                            }
313                    }
314                if (child.waitFor() != 0) {
315                    owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
316                }
317                    in.close();
318                    err.close();
319                    if (!saveInstanceFiles) {
320                                    getFile(".ins").delete();
321                                    getFile(".ins.tmp").delete();
322                    }
323                    owner.getGuide().getConfiguration().getConfigLogger().info('\n');
324                    } catch (InterruptedException e) {
325                             throw new LibsvmException("SVM-trainer is interrupted. ", e);
326                    } catch (IllegalArgumentException e) {
327                            throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
328                    } catch (SecurityException e) {
329                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
330                    } catch (IOException e) {
331                            throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
332                    } catch (OutOfMemoryError e) {
333                            throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
334                    }
335            }
336            
337            private int[] getCardinalities(FeatureVector featureVector) {
338                    int[] cardinalities = new int[featureVector.size()];
339                    int i = 0;
340                    for (FeatureFunction feature : featureVector) {
341                            cardinalities[i++] = feature.getFeatureValue().getCardinality();
342                    }
343                    return cardinalities;
344            }
345            
346            private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
347                    final BufferedWriter out = new BufferedWriter(osw);
348                    try {
349                            for (int i = 0, n = cardinalities.length; i < n; i++) {
350                                    out.write(Integer.toString(cardinalities[i]));
351                                    if (i < n - 1) {
352                                            out.write(',');
353                                    }
354                            }
355                            out.write('\n');
356                            out.close();
357                    } catch (IOException e) {
358                            throw new LibsvmException("Couldn't save the cardinalities to file. ", e);
359                    }
360            }
361            
362            private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException {
363                    int[] cardinalities = null;
364                    try {
365                            final BufferedReader in = new BufferedReader(isr); 
366                            String line;
367                            if ((line = in.readLine()) != null) {
368                                    String[] items = line.split(",");
369                                    cardinalities = new int[items.length];
370                                    for (int i = 0; i < items.length; i++) {
371                                            cardinalities[i] = Integer.parseInt(items[i]);
372                                    }
373                            }
374                            in.close();
375                    } catch (IOException e) {
376                            throw new LibsvmException("The cardinalities cannot be read because wrongly formatted. ", e);
377                    } catch (NumberFormatException e) {
378                            throw new LibsvmException("Couldn't load the cardinalities from file. ", e);
379                    }
380                    return cardinalities;
381            }
382            
383            /* (non-Javadoc)
384             * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList)
385             */
386            public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
387                    if (method == null) {
388                            throw new LibsvmException("The learning method cannot be found. ");
389                    } else if (divideFeature == null) {
390                            throw new LibsvmException("The divide feature cannot be found. ");
391                    } 
392                    try {
393                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
394                            final BufferedWriter out = method.getInstanceWriter();
395                            final StringBuilder sb = new StringBuilder(6);
396                            int l = in.read();
397                            char c;
398                            int j = 0;
399                            while(true) {
400                                    if (l == -1) {
401                                            sb.setLength(0);
402                                            break;
403                                    }
404                                    
405                                    c = (char)l; 
406                                    l = in.read();
407                                    if (c == '\t') {
408                                            if (divideFeatureIndexVector.contains(j-1)) {
409                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
410                                                    out.write('\t');
411                                            }
412                                            out.write(sb.toString());
413                                            j++;
414                                            out.write('\t');
415                                            sb.setLength(0);
416                                    } else if (c == '\n') {
417                                            if (sb.length() > 0) { 
418                                                    out.write(sb.toString());
419                                            }
420                                            if (divideFeatureIndexVector.contains(j-1)) {
421                                                    if (sb.length() > 0) { 
422                                                            out.write('\t');
423                                                    }
424                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
425                                            }
426                                            out.write('\n');
427                                            sb.setLength(0);
428                                            method.increaseNumberOfInstances();
429                                            this.decreaseNumberOfInstances();
430                                            j = 0;
431                                    } else {
432                                            sb.append(c);
433                                    }
434                            }       
435                            in.close();
436                            getFile(".ins").delete();
437                    } catch (SecurityException e) {
438                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
439                    } catch (NullPointerException  e) {
440                            throw new LibsvmException("The instance file cannot be found. ", e);
441                    } catch (FileNotFoundException e) {
442                            throw new LibsvmException("The instance file cannot be found. ", e);
443                    } catch (IOException e) {
444                            throw new LibsvmException("The LIBSVM learner read from the instance file. ", e);
445                    }
446            }
447            
448            /* (non-Javadoc)
449             * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
450             */
451            public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
452                    if (model == null) {
453                            try {
454                                    model = svm.svm_load_model(new BufferedReader(getInstanceInputStreamReaderFromConfigFile(".mod")));
455                            } catch (IOException e) {
456                                    throw new LibsvmException("The model cannot be loaded. ", e);
457                            }
458                    }
459                    if (cardinalities == null) {
460                            if (getConfigFileEntry(".car") != null) {
461                                    cardinalities = loadCardinalities(getInstanceInputStreamReaderFromConfigFile(".car"));
462                            } else {
463                                    cardinalities = getCardinalities(featureVector);
464                            }
465                    }
466                    if (xlist == null) {
467                            xlist = new ArrayList<svm_node>(featureVector.size()); 
468                    }
469                    if (model == null) { 
470                            throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. ");
471                    } else if (featureVector == null) {
472                            throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. ");
473                    }
474                    int j = 0;
475                    int offset = 0;
476                    int i = 0;
477                    for (FeatureFunction feature : featureVector) {
478                            final FeatureValue featureValue = feature.getFeatureValue();
479                            if (!(excludeNullValues == true && featureValue.isNullValue())) {
480                                    if (featureValue instanceof SingleFeatureValue) {
481                                            if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) {
482                                                    if (j >= xlist.size()) {
483                                                            svm_node x =  new svm_node();
484                                                            x.value = 1;
485                                                            xlist.add(j,x);
486                                                    }
487                                                    xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset;
488                                            }
489                                    } else if (featureValue instanceof MultipleFeatureValue) {
490                                            for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
491                                                    if (value < cardinalities[i]) {
492    //                                              if (((MultipleFeatureValue)featureValue).isKnown(value)) {
493                                                            if (j >= xlist.size()) {
494                                                                    svm_node x =  new svm_node();
495                                                                    x.value = 1;
496                                                                    xlist.add(j,x);
497                                                            }
498                                                            xlist.get(j++).index = value + offset;
499                                                    }
500                                            }
501                                    }
502                            }
503                            offset += cardinalities[i];
504                            i++;
505                    }
506    
507                    svm_node[] xarray = new svm_node[j];
508                    for (int k = 0; k < j; k++) {
509                            xarray[k] = xlist.get(k);
510                    }
511                    try {
512                            if (decision.getKBestList().getK() == 1 || svm.svm_get_svm_type(model) == svm_parameter.ONE_CLASS ||
513                                            svm.svm_get_svm_type(model) == svm_parameter.EPSILON_SVR ||
514                                            svm.svm_get_svm_type(model) == svm_parameter.NU_SVR) {
515                                    decision.getKBestList().add((int)svm.svm_predict(model, xarray));
516                            } else {
517                                    svm_predict_with_kbestlist(model, xarray, decision.getKBestList());
518                            }
519    
520                    } catch (OutOfMemoryError e) {
521                                    throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
522                    }
523    
524                    return true;
525            }
526            
527    
528            public void terminate() throws MaltChainedException { 
529                    closeInstanceWriter();
530                    model = null;
531                    svmParam = null;
532                    xlist = null;
533                    owner = null;
534            }
535    
536            public BufferedWriter getInstanceWriter() {
537                    return instanceOutput;
538            }
539            
540            protected void closeInstanceWriter() throws MaltChainedException {
541                    try {
542                            if (instanceOutput != null) {
543                                    instanceOutput.flush();
544                                    instanceOutput.close();
545                                    instanceOutput = null;
546                            }
547                    } catch (IOException e) {
548                            throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e);
549                    }
550            }
551            
552            /**
553             * Initialize the LIBSVM according to the parameter string
554             * 
555             * @param paramString the parameter string to configure the LIBSVM learner.
556             * @throws MaltChainedException
557             */
558            protected void initSvmParam(String paramString) throws MaltChainedException {
559                    this.paramString = paramString;
560                    svmParam = new svm_parameter();
561                    initParameters(svmParam);
562                    parseParameters(paramString, svmParam);
563            }
564            
565            /**
566             * Returns the parameter string for used for configure LIBSVM
567             * 
568             * @return the parameter string for used for configure LIBSVM
569             */
570            public String getParamString() {
571                    return paramString;
572            }
573            
574            public InstanceModel getOwner() {
575                    return owner;
576            }
577    
578            protected void setOwner(InstanceModel owner) {
579                    this.owner = owner;
580            }
581            
582            public int getLearnerMode() {
583                    return learnerMode;
584            }
585    
586            public void setLearnerMode(int learnerMode) throws MaltChainedException {
587                    this.learnerMode = learnerMode;
588            }
589            
590            public String getLearningMethodName() {
591                    return name;
592            }
593            
594            /**
595             * Returns the current configuration
596             * 
597             * @return the current configuration
598             * @throws MaltChainedException
599             */
600            public DependencyParserConfig getConfiguration() throws MaltChainedException {
601                    return owner.getGuide().getConfiguration();
602            }
603            
604            public int getNumberOfInstances() throws MaltChainedException {
605                    if(numberOfInstances!=0)
606                            return numberOfInstances;
607                    else{
608                            //Do a line count of the instance file and return that
609                            
610                            BufferedReader reader = new BufferedReader( getInstanceInputStreamReader(".ins"));
611                            try {
612                                    while(reader.readLine()!=null){
613                                            numberOfInstances++;
614                                            owner.increaseFrequency();
615                                    }
616                                    
617                                    reader.close();
618                            } catch (IOException e) {
619                                    throw new MaltChainedException("No instances found in file",e);
620                            }
621                            
622                            
623                            
624                            return numberOfInstances;
625                            
626                    }
627            }
628    
629            public void increaseNumberOfInstances() {
630                    numberOfInstances++;
631                    owner.increaseFrequency();
632            }
633            
634            public void decreaseNumberOfInstances() {
635                    numberOfInstances--;
636                    owner.decreaseFrequency();
637            }
638            
639            protected void setNumberOfInstances(int numberOfInstances) {
640                    this.numberOfInstances = 0;
641            }
642    
643            protected void setLearningMethodName(String name) {
644                    this.name = name;
645            }
646            
647            protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
648                    return getConfiguration().getConfigurationDir().getAppendOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
649            }
650            
651            protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
652                    return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
653            }
654            
655            protected InputStreamReader getInstanceInputStreamReaderFromConfigFile(String suffix) throws MaltChainedException {
656                    return getConfiguration().getConfigurationDir().getInputStreamReaderFromConfigFile(owner.getModelName()+getLearningMethodName()+suffix);
657            }
658            
659            protected File getFile(String suffix) throws MaltChainedException {
660                    return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix);
661            }
662            
663            protected JarEntry getConfigFileEntry(String suffix) throws MaltChainedException {
664                    return getConfiguration().getConfigurationDir().getConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix);
665            }
666            
667            /**
668             * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
669             * 
670             * @param isr   the instance stream reader for the instance file
671             * @param cardinalities a array containing the number of distinct values for a particular column.
672             * @param param a svm_parameter object
673             * @throws LibsvmException
674             */
675            public final svm_problem readProblemMaltSVMFormat(InputStreamReader isr, int[] cardinalities, svm_parameter param) throws MaltChainedException {
676                    final svm_problem prob = new svm_problem();
677                    try {
678                            final BufferedReader fp = new BufferedReader(isr);
679                            int max_index = 0;
680                            if (xlist == null) {
681                                    xlist = new ArrayList<svm_node>(); 
682                            }
683                            prob.l = getNumberOfInstances();
684                            prob.x = new svm_node[prob.l][];
685                            prob.y = new double[prob.l];
686                            int i = 0;
687                            final Pattern tabPattern = Pattern.compile("\t");
688                            final Pattern pipePattern = Pattern.compile("\\|");
689                            while(true) {
690                                    String line = fp.readLine();
691                                    if(line == null) break;
692                                    String[] columns = tabPattern.split(line);
693    
694                                    if (columns.length == 0) {
695                                            continue;
696                                    }
697                                    
698                                    int offset = 0; 
699                                    int j = 0;
700                                    try {
701                                            prob.y[i] = (double)Integer.parseInt(columns[j]);
702                                            int p = 0;
703                                            for(j = 1; j < columns.length; j++) {
704                                                    final String[] items = pipePattern.split(columns[j]);   
705                                                    for (int k = 0; k < items.length; k++) {
706                                                            try {
707                                                                    if (Integer.parseInt(items[k]) != -1) {
708                                                                            xlist.add(p, new svm_node());
709                                                                            xlist.get(p).value = 1;
710                                                                            xlist.get(p).index = Integer.parseInt(items[k])+offset;
711                                                                            p++;
712                                                                    }
713                                                            } catch (NumberFormatException e) {
714                                                                    throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e);
715                                                            }
716                                                    }
717                                                    offset += cardinalities[j-1];
718                                            }
719                                            prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]);
720                                            if(columns.length > 1) {
721                                                    max_index = Math.max(max_index, xlist.get(p-1).index);
722                                            }
723                                            i++;
724                                            xlist.clear();
725                                    } catch (ArrayIndexOutOfBoundsException e) {
726                                            throw new LibsvmException("Cannot read from the instance file. ", e);
727                                    }
728                            }
729                            fp.close();     
730                            if (param.gamma == 0) {
731                                    param.gamma = 1.0/max_index;
732                            }
733                            xlist = null;
734                    } catch (IOException e) {
735                            throw new LibsvmException("Cannot read from the instance file. ", e);
736                    }
737                    return prob;
738            }
739            
740            protected void initSpecialParameters() throws MaltChainedException {
741                    if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
742                            excludeNullValues = true;
743                    } else {
744                            excludeNullValues = false;
745                    }
746                    saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue();
747                            
748                    if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) {
749                            try {
750                                    if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) {
751                                            throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong.");
752                                    }
753                                    if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) {
754                                            throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file");
755                                    }
756                                    if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) {
757                                            throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. ");
758                                    }
759                                    pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString();
760                            } catch (SecurityException e) {
761                                    throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e);
762                            }
763                    }
764                    if (getConfiguration().getOptionValue("libsvm", "verbosity") != null) {
765                            verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("libsvm", "verbosity").toString().toUpperCase());
766                    }
767            }
768            
769            /**
770             * Assign a default value to all svm parameters
771             * 
772             * @param param a svm_parameter object
773             */
774            protected void initParameters(svm_parameter param) throws MaltChainedException {
775                    if (param == null) {
776                            throw new LibsvmException("Svm-parameters cannot be found. ");
777                    }
778                    param.svm_type = svm_parameter.C_SVC;
779                    param.kernel_type = svm_parameter.POLY;
780                    param.degree = 2;
781                    param.gamma = 0.2;      // 1/k
782                    param.coef0 = 0;
783                    param.nu = 0.5;
784                    param.cache_size = 100; 
785                    param.C = 1; 
786                    param.eps = 1.0; 
787                    param.p = 0.1;
788                    param.shrinking = 1;
789                    param.probability = 0;
790                    param.nr_weight = 0;
791                    param.weight_label = new int[0];
792                    param.weight = new double[0];
793            }
794            
795            /**
796             * Returns a string containing all svm-parameters of interest
797             * 
798             * @param param a svm_parameter object
799             * @return a string containing all svm-parameters of interest
800             */
801            public String toStringParameters(svm_parameter param)  {
802                    if (param == null) {
803                            throw new IllegalArgumentException("Svm-parameters cannot be found. ");
804                    }
805                    final StringBuffer sb = new StringBuffer();
806                    
807                    final String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"};
808                    final String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"};
809                    final DecimalFormat dform = new DecimalFormat("#0.0#"); 
810                    final DecimalFormatSymbols sym = new DecimalFormatSymbols();
811                    sym.setDecimalSeparator('.');
812                    dform.setDecimalFormatSymbols(sym);
813                    sb.append("LIBSVM SETTINGS\n");
814                    sb.append("  SVM type      : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n");
815                    sb.append("  Kernel        : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n");
816                    if (param.kernel_type == svm_parameter.POLY) {
817                            sb.append("  Degree        : " + param.degree + "\n");
818                    }
819                    if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) {
820                            sb.append("  Gamma         : " + dform.format(param.gamma) + "\n");
821                            if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) {
822                                    sb.append("  Coef0         : " + dform.format(param.coef0) + "\n");
823                            }
824                    }
825                    if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) {
826                            sb.append("  Nu            : " + dform.format(param.nu) + "\n");
827                    }
828                    sb.append("  Cache Size    : " + dform.format(param.cache_size) + " MB\n");
829                    if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) {
830                            sb.append("  C             : " + dform.format(param.C) + "\n");
831                    }
832                    sb.append("  Eps           : " + dform.format(param.eps) + "\n");
833                    if (param.svm_type == svm_parameter.EPSILON_SVR) {
834                            sb.append("  P             : " + dform.format(param.p) + "\n");
835                    }
836                    sb.append("  Shrinking     : " + param.shrinking + "\n");
837                    sb.append("  Probability   : " + param.probability + "\n");
838                    if (param.svm_type == svm_parameter.C_SVC) {
839                            sb.append("  #Weight       : " + param.nr_weight + "\n");
840                            if (param.nr_weight > 0) {
841                                    sb.append("  Weight labels : ");
842                                    for (int i = 0; i < param.nr_weight; i++) {
843                                            sb.append(param.weight_label[i]);
844                                            if (i != param.nr_weight-1) {
845                                                    sb.append(", ");
846                                            }
847                                    }
848                                    sb.append("\n");
849                                    for (int i = 0; i < param.nr_weight; i++) {
850                                            sb.append(dform.format(param.weight));
851                                            if (i != param.nr_weight-1) {
852                                                    sb.append(", ");
853                                            }
854                                    }
855                                    sb.append("\n");
856                            }
857                    }
858                    return sb.toString();
859            }
860            
861            public String[] getSVMParamStringArray(svm_parameter param) {
862                    final ArrayList<String> params = new ArrayList<String>();
863    
864                    if (param.svm_type != 0) {
865                            params.add("-s"); params.add(new Integer(param.svm_type).toString());
866                    }
867                    if (param.kernel_type != 2) {
868                            params.add("-t"); params.add(new Integer(param.kernel_type).toString());
869                    }
870                    if (param.degree != 3) {
871                            params.add("-d"); params.add(new Integer(param.degree).toString());
872                    }
873                    params.add("-g"); params.add(new Double(param.gamma).toString());
874                    if (param.coef0 != 0) {
875                            params.add("-r"); params.add(new Double(param.coef0).toString());
876                    }
877                    if (param.nu != 0.5) {
878                            params.add("-n"); params.add(new Double(param.nu).toString());
879                    }
880                    if (param.cache_size != 100) {
881                            params.add("-m"); params.add(new Double(param.cache_size).toString());
882                    }
883                    if (param.C != 1) {
884                            params.add("-c"); params.add(new Double(param.C).toString());
885                    }
886                    if (param.eps != 0.001) {
887                            params.add("-e"); params.add(new Double(param.eps).toString());
888                    }
889                    if (param.p != 0.1) {
890                            params.add("-p"); params.add(new Double(param.p).toString());
891                    }
892                    if (param.shrinking != 1) {
893                            params.add("-h"); params.add(new Integer(param.shrinking).toString());
894                    }
895                    if (param.probability != 0) {
896                            params.add("-b"); params.add(new Integer(param.probability).toString());
897                    }
898    
899                    return params.toArray(new String[params.size()]);
900            }
901            
902            /**
903             * Parses the parameter string. The parameter string must contain parameter and value pairs, which are separated by a blank 
904             * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with
905             * the parameters data type. Some examples:
906             * 
907             * -s 0 -t 1 -d 2 -g 0.4 -e 0.1
908             * -s_0_-t_1_-d_2_-g_0.4_-e_0.1
909             * 
910             * @param paramstring   the parameter string 
911             * @param param a svm_parameter object
912             * @throws LibsvmException
913             */
914            public void parseParameters(String paramstring, svm_parameter param) throws MaltChainedException {
915                    if (param == null) {
916                            throw new LibsvmException("Svm-parameters cannot be found. ");
917                    }
918                    if (paramstring == null) {
919                            return;
920                    }
921                    final String[] argv;
922                    try {
923                            argv = paramstring.split("[_\\p{Blank}]");
924                    } catch (PatternSyntaxException e) {
925                            throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e);
926                    }
927                    for (int i=0; i < argv.length-1; i++) {
928                            if(argv[i].charAt(0) != '-') {
929                                    throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
930                            }
931                            if(++i>=argv.length) {
932                                    throw new LibsvmException("The last argument does not have any value. ");
933                            }
934                            try {
935                                    switch(argv[i-1].charAt(1)) {
936                                    case 's':
937                                            param.svm_type = Integer.parseInt(argv[i]);
938                                            break;
939                                    case 't':
940                                            param.kernel_type = Integer.parseInt(argv[i]);
941                                            break;
942                                    case 'd':
943                                            param.degree = Integer.parseInt(argv[i]);
944                                            break;
945                                    case 'g':
946                                            param.gamma = Double.valueOf(argv[i]).doubleValue();
947                                            break;
948                                    case 'r':
949                                            param.coef0 = Double.valueOf(argv[i]).doubleValue();
950                                            break;
951                                    case 'n':
952                                            param.nu = Double.valueOf(argv[i]).doubleValue();
953                                            break;
954                                    case 'm':
955                                            param.cache_size = Double.valueOf(argv[i]).doubleValue();
956                                            break;
957                                    case 'c':
958                                            param.C = Double.valueOf(argv[i]).doubleValue();
959                                            break;
960                                    case 'e':
961                                            param.eps = Double.valueOf(argv[i]).doubleValue();
962                                            break;
963                                    case 'p':
964                                            param.p = Double.valueOf(argv[i]).doubleValue();
965                                            break;
966                                    case 'h':
967                                            param.shrinking = Integer.parseInt(argv[i]);
968                                            break;
969                                case 'b':
970                                            param.probability = Integer.parseInt(argv[i]);
971                                            break;
972                                    case 'w':
973                                            ++param.nr_weight;
974                                            {
975                                                    int[] old = param.weight_label;
976                                                    param.weight_label = new int[param.nr_weight];
977                                                    System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1);
978                                            }
979            
980                                            {
981                                                    double[] old = param.weight;
982                                                    param.weight = new double[param.nr_weight];
983                                                    System.arraycopy(old,0,param.weight,0,param.nr_weight-1);
984                                            }
985            
986                                            param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2));
987                                            param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue();
988                                            break;
989                                    case 'Y':
990                                    case 'V':
991                                    case 'S':
992                                    case 'F':
993                                    case 'T':
994                                    case 'M':
995                                    case 'N':
996                                            break;
997                                    default:
998                                            throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");         
999                                    }
1000                            } catch (ArrayIndexOutOfBoundsException e) {
1001                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
1002                            } catch (NumberFormatException e) {
1003                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);      
1004                            } catch (NullPointerException e) {
1005                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);      
1006                            }
1007                    }
1008            }
1009    
1010            public void svm_predict_with_kbestlist(svm_model model, svm_node[] x, KBestList kBestList) throws MaltChainedException {
1011                    int i;
1012                    final int nr_class = svm.svm_get_nr_class(model);
1013                    final double[] dec_values = new double[nr_class*(nr_class-1)/2];
1014                    svm.svm_predict_values(model, x, dec_values);
1015    
1016                    final int[] vote = new int[nr_class];
1017                    final double[] score = new double[nr_class];
1018                    final int[] voteindex = new int[nr_class];
1019                    for(i=0;i<nr_class;i++) {
1020                            vote[i] = 0;
1021                            score[i] = 0.0;
1022                            voteindex[i] = i;
1023                    }
1024                    int pos=0;
1025                    for(i=0;i<nr_class;i++) {
1026                            for(int j=i+1;j<nr_class;j++) {
1027                                    if(dec_values[pos] > 0) {
1028                                            vote[i]++;
1029                                    } else {
1030                                            vote[j]++;
1031                                    }
1032                                    score[i] += dec_values[pos];
1033                                    score[j] += dec_values[pos];
1034                                    pos++;
1035                            }
1036                    }
1037                    for(i=0;i<nr_class;i++) {
1038                            score[i] = score[i]/nr_class;
1039                    }
1040                    int lagest, tmpint;
1041                    double tmpdouble;
1042                    for (i=0;i<nr_class-1;i++) {
1043                            lagest = i;
1044                            for (int j=i;j<nr_class;j++) {
1045                                    if (vote[j] > vote[lagest]) {
1046                                            lagest = j;
1047                                    }
1048                            }
1049                            tmpint = vote[lagest];
1050                            vote[lagest] = vote[i];
1051                            vote[i] = tmpint;
1052                            tmpdouble = score[lagest];
1053                            score[lagest] = score[i];
1054                            score[i] = tmpdouble;
1055                            tmpint = voteindex[lagest];
1056                            voteindex[lagest] = voteindex[i];
1057                            voteindex[i] = tmpint;
1058                    }
1059                    final int[] labels = new int[nr_class];
1060                    svm.svm_get_labels(model, labels);
1061                    int k = nr_class-1;
1062                    if (kBestList.getK() != -1) {
1063                            k = kBestList.getK() - 1;
1064                    }
1065                    
1066                    for (i=0; i<nr_class && k >= 0; i++, k--) {
1067                            if (vote[i] > 0 || i == 0) {
1068                                    if (kBestList instanceof ScoredKBestList) {
1069                                            ((ScoredKBestList)kBestList).add(labels[voteindex[i]], (float)vote[i]/(float)(nr_class*(nr_class-1)/2));
1070                                    } else {
1071                                            kBestList.add(labels[voteindex[i]]);
1072                                    }
1073                            }
1074                    }
1075            }
1076            /**
1077             * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced)
1078             * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
1079             * 
1080             * @param isr the input stream reader for the source instance file
1081             * @param osw   the output stream writer for the destination instance file
1082             * @param cardinalities a vector containing the number of distinct values for a particular column
1083             * @throws LibsvmException
1084             */
1085            public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
1086                    try {
1087                            final BufferedReader in = new BufferedReader(isr);
1088                            final BufferedWriter out = new BufferedWriter(osw);
1089    
1090                            int c;
1091                            int j = 0;
1092                            int offset = 0;
1093                            int code = 0;
1094                            while(true) {
1095                                    c = in.read();
1096                                    if (c == -1) {
1097                                            break;
1098                                    }
1099                                    
1100                                    if (c == '\t' || c == '|') {
1101                                            if (j == 0) {
1102                                                    out.write(Integer.toString(code));
1103                                                    j++;
1104                                            } else {
1105                                                    if (code != -1) {
1106                                                            out.write(' ');
1107                                                            out.write(Integer.toString(code+offset));
1108                                                            out.write(":1");
1109                                                    }
1110                                                    if (c == '\t') {
1111                                                            offset += cardinalities[j-1];
1112                                                            j++;
1113                                                    }
1114                                            }
1115                                            code = 0;
1116                                    } else if (c == '\n') {
1117                                            j = 0;
1118                                            offset = 0;
1119                                            out.write('\n');
1120                                            code = 0;
1121                                    } else if (c == '-') {
1122                                            code = -1;
1123                                    } else if (code != -1) {
1124                                            if (c > 47 && c < 58) {
1125                                                    code = code * 10 + (c-48);
1126                                            } else {
1127                                                    throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format.");
1128                                            }
1129                                    }       
1130                            }                       
1131                            in.close();     
1132                            out.close();
1133                    } catch (IOException e) {
1134                            throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e);
1135                    }
1136            }
1137            
1138            protected void finalize() throws Throwable {
1139                    try {
1140                            closeInstanceWriter();
1141                    } finally {
1142                            super.finalize();
1143                    }
1144            }
1145            
1146            /* (non-Javadoc)
1147             * @see java.lang.Object#toString()
1148             */
1149            public String toString() {
1150                    final StringBuffer sb = new StringBuffer();
1151                    sb.append("\nLIBSVM INTERFACE\n"); 
1152                    sb.append("  LIBSVM version: "+LIBSVM_VERSION+"\n");
1153                    sb.append("  SVM-param string: "+paramString+"\n");
1154                    
1155                    sb.append(toStringParameters(svmParam));
1156                    return sb.toString();
1157            }
1158    
1159    
1160            @Override
1161            public void divideByFeatureSet(
1162                            Set<Integer> featureIdsToCreateSeparateBranchesForSet, ArrayList<Integer> divideFeatureIndexVector, String otherId)  throws MaltChainedException {
1163    
1164                    
1165                    //Create a hash map that maps every feature id to a writer
1166                    HashMap<Integer, BufferedWriter>   featureIdToWriterMap = new HashMap<Integer, BufferedWriter>();
1167                    
1168                    for(int element:featureIdsToCreateSeparateBranchesForSet){
1169                     
1170    
1171                            BufferedWriter outputWriter = new BufferedWriter(getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName().replace('.','_') + element + "." + getLearningMethodName()+".ins"));
1172                            featureIdToWriterMap.put(element, outputWriter);
1173                    
1174                    }
1175                    
1176                    BufferedWriter otherOutputWriter = new BufferedWriter(getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName().replace('.','_') + otherId + "." + getLearningMethodName()+".ins"));
1177    
1178                    
1179                    try {
1180                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
1181                            //every line will be written to a separate file
1182                            String line = in.readLine();
1183                            final Pattern tabPattern = Pattern.compile("\t");
1184                            while(line!=null){
1185                                    
1186                                    //Find out which pot the line shall be put in
1187                                    String[] lineArray = tabPattern.split(line);
1188                                    
1189                                    int id = new Integer(lineArray[divideFeatureIndexVector.get(0)+1]);
1190                                    
1191                                    if(!featureIdToWriterMap.containsKey(id)){
1192                                            otherOutputWriter.write(line + "\n");
1193                                    }else    
1194                                            featureIdToWriterMap.get(id).write(getLineToWrite(lineArray,divideFeatureIndexVector.get(0)+1));
1195                                    
1196                                    line = in.readLine();
1197                            }
1198                            
1199                            otherOutputWriter.close();
1200                            
1201                            in.close();
1202                            
1203                            for(BufferedWriter writer: featureIdToWriterMap.values())
1204                                    writer.close();
1205    
1206                    } catch (SecurityException e) {
1207                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
1208                    } catch (NullPointerException  e) {
1209                            throw new LiblinearException("The instance file cannot be found. ", e);
1210                    } catch (FileNotFoundException e) {
1211                            throw new LiblinearException("The instance file cannot be found. ", e);
1212                    } catch (IOException e) {
1213                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
1214                    }
1215    
1216                    
1217    
1218            }
1219    
1220    
1221            private String getLineToWrite(String[] lineArray, int excludeIndex) {
1222                    StringBuffer buf = new StringBuffer();
1223                    
1224                    for(int n = 0; n < lineArray.length; n++)
1225                            if(n != excludeIndex)
1226                                    buf.append(lineArray[n] + "\t");
1227                    
1228                    
1229                    buf.append("\n");               
1230    
1231                    
1232                    return buf.toString();
1233            }
1234    
1235    
1236            @Override
1237            public Map<Integer, Integer> createFeatureIdToCountMap(
1238                            ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException{
1239    
1240                    HashMap<Integer, Integer> featureIdToCountMap = new HashMap<Integer, Integer>();
1241                    
1242                    //Go trough the file and count all feature ids in the given column(s)
1243                    
1244                    try {
1245                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
1246                            //every line will be written to a separate file
1247                            String line = in.readLine();
1248                            final Pattern tabPattern = Pattern.compile("\t");
1249                            while(line!=null){
1250                                    
1251                                    //Find out which pot the line shall be put in
1252                                    String[] lineArray = tabPattern.split(line);
1253                                    
1254                                    for(int n = 0; n < divideFeatureIndexVector.size(); n++){
1255                                            int id = new Integer(lineArray[divideFeatureIndexVector.get(n)+1]);
1256                                            
1257                                            
1258                                            if (!featureIdToCountMap.containsKey(id)) {
1259    
1260                                                    featureIdToCountMap.put(id, 0);
1261    
1262                                            }
1263    
1264                                            int previousCount = featureIdToCountMap.get(id);
1265                                            
1266                                            featureIdToCountMap.put(id, previousCount + 1);
1267                                            
1268                                    }                               
1269                                    
1270                                    line = in.readLine();
1271                            }
1272                            
1273    
1274                            
1275                            in.close();
1276                            
1277    
1278                    } catch (SecurityException e) {
1279                            throw new LiblinearException("The Libsvm learner cannot remove the instance file. ", e);
1280                    } catch (NullPointerException  e) {
1281                            throw new LiblinearException("The instance file cannot be found. ", e);
1282                    } catch (FileNotFoundException e) {
1283                            throw new LiblinearException("The instance file cannot be found. ", e);
1284                    } catch (IOException e) {
1285                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
1286                    }
1287                    
1288                    
1289                    
1290                    return featureIdToCountMap;
1291            }
1292    
1293    
1294    
1295    }