001    package org.maltparser.core.helper;
002    
003    
004    import java.io.BufferedInputStream;
005    import java.io.BufferedOutputStream;
006    import java.io.File;
007    import java.io.FileInputStream;
008    import java.io.FileNotFoundException;
009    import java.io.FileOutputStream;
010    import java.io.IOException;
011    import java.io.InputStream;
012    import java.net.MalformedURLException;
013    import java.net.URL;
014    
015    import org.apache.log4j.Logger;
016    import org.maltparser.core.config.ConfigurationException;
017    import org.maltparser.core.exception.MaltChainedException;
018    import org.maltparser.core.plugin.Plugin;
019    import org.maltparser.core.plugin.PluginLoader;
020    
021    /**
022    *
023    *
024    * @author Johan Hall
025    */
026    public class Util {
027              private static final int BUFFER = 4096;
028              private static final char AMP_CHAR = '&';
029              private static final char LT_CHAR = '<';
030              private static final char GT_CHAR = '>';
031              private static final char QUOT_CHAR = '"';
032              private static final char APOS_CHAR = '\'';
033            
034              public static String xmlEscape(String str) {
035                      boolean needEscape = false;
036                      char c;
037                      for (int i = 0; i < str.length(); i++) {
038                              c = str.charAt(i);
039                              if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) {
040                                      needEscape = true;
041                                      break;
042                              }
043                      }
044                      if (!needEscape) {
045                              return str;
046                      }
047                      final StringBuilder sb = new StringBuilder();
048                      for (int i = 0; i < str.length(); i++) {
049                              c = str.charAt(i);
050                              if (str.charAt(i) == AMP_CHAR) {
051                                      sb.append("&amp;");
052                              } else if ( str.charAt(i) == LT_CHAR) {
053                                      sb.append("&lt;");
054                              } else if (str.charAt(i) == GT_CHAR) {
055                                      sb.append("&gt;");
056                              } else if (str.charAt(i) == QUOT_CHAR) {
057                                      sb.append("&quot;");
058                              } else if (str.charAt(i) == APOS_CHAR) {
059                                      sb.append("&apos;");
060                              } else {
061                                      sb.append(c);
062                              }
063                      }
064                      return sb.toString();
065              }
066    
067            /**
068             * Search for a file according the following priority:
069             * <ol>
070             * <li>The local file system
071             * <li>Specified as an URL (starting with http:, file:, ftp: or jar:
072             * <li>MaltParser distribution file (malt.jar)
073             * <li>MaltParser plugins
074             * </ol>
075             * 
076             * If the file string is found, an URL object is returned, otherwise <b>null</b>
077             * 
078             * @param fileString    the file string to convert into an URL.
079             * @return an URL object, if the file string is found, otherwise <b>null</b>
080             * @throws MaltChainedException
081             */
082            public static URL findURL(String fileString) throws MaltChainedException {
083                    File specFile = new File(fileString);
084    
085                    try {
086                            if (specFile.exists()) {
087                                    // found the file in the file system
088                                    return new URL("file:///"+specFile.getAbsolutePath());
089                            } else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) {
090                                    // the input string is an URL string starting with http, file, ftp or jar
091                                    return new URL(fileString);
092                            } else {
093                                    return findURLinJars(fileString);
094                            } 
095                    } catch (MalformedURLException e) {
096                            throw new MaltChainedException("Malformed URL: "+fileString, e);
097                    }
098            }
099            
100            public static URL findURLinJars(String fileString) throws MaltChainedException {
101                    try {
102                            // search in malt.jar and its plugins
103                            if (Thread.currentThread().getClass().getResource(fileString) != null) {
104                                    // found the input string in the malt.jar file
105                                    return Thread.currentThread().getClass().getResource(fileString);
106                            } else { 
107                                     for (Plugin plugin : PluginLoader.instance()) {
108                                            URL url = null;
109                                            if (!fileString.startsWith("/")) {
110                                                    url = new URL("jar:"+plugin.getUrl() + "!/" + fileString);
111                                            } else {
112                                                    url = new URL("jar:"+plugin.getUrl() + "!" + fileString);
113                                            }
114                                            
115                                            try { 
116                                                    InputStream is = url.openStream();
117                                                    is.close();
118                                            } catch (IOException e) {
119                                                    continue;
120                                            }
121                                            // found the input string in one of the plugins
122                                            return url;
123                                    } 
124                                    // could not convert the input string into an URL
125                                    return null; 
126                            }
127                    } catch (MalformedURLException e) {
128                            throw new MaltChainedException("Malformed URL: "+fileString, e);
129                    }
130            }
131            
132            public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
133                    logger.info(".");
134                    int tic = inTic + 1;
135                    if (tic >= nTicxRow) {
136                            ticInfo(logger, startTime, subject);
137                            tic = 0;
138                    }
139                    return tic;
140            }
141            
142            public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) {
143                    logger.info(".");
144                    for (int i = 1; i <= nTicxRow; i++) {
145                            logger.info(" ");
146                    }
147                    ticInfo(logger, startTime, subject);
148            }
149            
150            public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
151                    for (int i = inTic; i <= nTicxRow; i++) {
152                            logger.info(" ");
153                    }
154                    ticInfo(logger, startTime, subject);
155            }
156            
157            private static void ticInfo(Logger logger, long startTime, int subject) {
158                    logger.info("\t");
159                    int a = 1000000;
160                    if (subject != 0) {
161                            while (subject/a == 0) {
162                                    logger.info(" ");
163                                    a /= 10;
164                            }
165                    } else {
166                            logger.info("      ");
167                    }
168                    logger.info(subject);
169                    logger.info("\t");
170                    long time = (System.currentTimeMillis()-startTime)/1000;
171                    a = 1000000;
172                    if (time != 0) {
173                            while (time/a == 0 ) {
174                                    logger.info(" ");
175                                    a /= 10;
176                            }
177                            logger.info(time);
178                            logger.info("s");
179                    } else {
180                            logger.info("      0s");
181                    }
182                    logger.info("\t");
183                    long memory =  (Runtime.getRuntime().totalMemory() -  Runtime.getRuntime().freeMemory())/1000000;
184                    a = 1000000;
185                    if (memory != 0) {
186                            while (memory/a == 0 ) {
187                                    logger.info(" ");
188                                    a /= 10;
189                            }
190                            logger.info(memory);
191                            logger.info("MB\n");
192                    } else {
193                            logger.info("      0MB\n");
194                    }
195            }
196            
197            public static void copyfile(String source, String destination) throws MaltChainedException {
198            try {
199                    byte[] readBuffer = new byte[BUFFER];
200                    BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source));
201                    BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
202                    int n = 0;
203                        while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
204                            bos.write(readBuffer, 0, n);
205                        }
206                    bos.flush();
207                    bos.close();
208                    bis.close();
209                    } catch (FileNotFoundException e) {
210                            throw new MaltChainedException("The destination file '"+destination+"' cannot be created when coping the file. ", e);
211                    } catch (IOException e) {
212                            throw new MaltChainedException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e);
213                    }
214            }
215    
216    }