001    package org.maltparser.core.symbol.trie;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.FileInputStream;
006    import java.io.FileNotFoundException;
007    import java.io.FileOutputStream;
008    import java.io.UnsupportedEncodingException;
009    
010    import java.io.IOException;
011    import java.io.InputStreamReader;
012    import java.io.OutputStreamWriter;
013    import java.util.HashMap;
014    import java.util.Set;
015    import java.util.regex.Pattern;
016    import java.util.regex.PatternSyntaxException;
017    
018    import org.apache.log4j.Logger;
019    
020    import org.maltparser.core.exception.MaltChainedException;
021    import org.maltparser.core.symbol.SymbolException;
022    import org.maltparser.core.symbol.SymbolTable;
023    import org.maltparser.core.symbol.SymbolTableHandler;
024    
025    
026    /**
027    
028    @author Johan Hall
029    @since 1.0
030    */
031    public class TrieSymbolTableHandler implements SymbolTableHandler {
032            private Trie trie;
033            private HashMap<String, TrieSymbolTable> symbolTables;
034            
035            public TrieSymbolTableHandler() {
036                    trie = new Trie();
037                    symbolTables = new HashMap<String, TrieSymbolTable>();
038            }
039    
040            public TrieSymbolTable addSymbolTable(String tableName) throws MaltChainedException {
041                    TrieSymbolTable symbolTable = symbolTables.get(tableName);
042                    if (symbolTable == null) {
043                            symbolTable = new TrieSymbolTable(tableName, trie);
044                            symbolTables.put(tableName, symbolTable);
045                    }
046                    return symbolTable;
047            }
048            
049            public TrieSymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException {
050                    TrieSymbolTable symbolTable = symbolTables.get(tableName);
051                    if (symbolTable == null) {
052                            TrieSymbolTable trieParentTable = (TrieSymbolTable)parentTable;
053                            symbolTable = new TrieSymbolTable(tableName, trie, trieParentTable.getColumnCategory(), trieParentTable.getNullValueStrategy());
054                            symbolTables.put(tableName, symbolTable);
055                    }
056                    return symbolTable;
057            }
058            
059            public TrieSymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy) throws MaltChainedException {
060                    TrieSymbolTable symbolTable = symbolTables.get(tableName);
061                    if (symbolTable == null) {
062                            symbolTable = new TrieSymbolTable(tableName, trie, columnCategory, nullValueStrategy);
063                            symbolTables.put(tableName, symbolTable);
064                    }
065                    return symbolTable;
066            }
067            
068            public TrieSymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy, String rootLabel) throws MaltChainedException {
069                    TrieSymbolTable symbolTable = symbolTables.get(tableName);
070                    if (symbolTable == null) {
071                            symbolTable = new TrieSymbolTable(tableName, trie, columnCategory, nullValueStrategy, rootLabel);
072                            symbolTables.put(tableName, symbolTable);
073                    }
074                    return symbolTable;
075            }
076            
077            public TrieSymbolTable getSymbolTable(String tableName) {
078                    return symbolTables.get(tableName);
079            }
080            
081            public Set<String> getSymbolTableNames() {
082                    return symbolTables.keySet();
083            }
084            
085            public void save(OutputStreamWriter osw) throws MaltChainedException  {
086                    try {
087                            BufferedWriter bout = new BufferedWriter(osw);
088                            for (TrieSymbolTable table : symbolTables.values()) {
089                                    table.saveHeader(bout);
090                            }
091                            bout.write('\n');
092                            for (TrieSymbolTable table : symbolTables.values()) {
093                                    table.save(bout);
094                            }
095                            bout.close();
096                    } catch (IOException e) {
097                            throw new SymbolException("Could not save the symbol tables. ", e);
098                    }               
099            }
100            
101            public void save(String fileName, String charSet) throws MaltChainedException  {
102                    try {
103                            save(new OutputStreamWriter(new FileOutputStream(fileName), charSet));
104                    } catch (FileNotFoundException e) {
105                            throw new SymbolException("The symbol table file '"+fileName+"' cannot be created. ", e);
106                    } catch (UnsupportedEncodingException e) {
107                            throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
108                    }
109            }
110            
111            public void loadHeader(BufferedReader bin) throws MaltChainedException {
112                    String fileLine = "";
113                    Pattern tabPattern = Pattern.compile("\t");
114                    try {
115                            while ((fileLine = bin.readLine()) != null) {
116                                    if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') {
117                                            break;
118                                    }
119                                    String items[];
120                                    try {
121                                            items = tabPattern.split(fileLine.substring(1));
122                                    } catch (PatternSyntaxException e) {
123                                            throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' could not split into atomic parts. ", e);
124                                    }
125                                    if (items.length != 4) {
126                                            throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' must contain four columns. ");
127                                    }
128                                    if (items[3].equals("#DUMMY#")) {
129                                            addSymbolTable(items[0], Integer.parseInt(items[1]), items[2]);
130                                    } else {
131                                            addSymbolTable(items[0], Integer.parseInt(items[1]), items[2], items[3]);
132                                    }
133                            }
134                    } catch (NumberFormatException e) {
135                            throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e);
136                    } catch (IOException e) {
137                            throw new SymbolException("Could not load the symbol table. ", e);
138                    }
139            }
140            
141            
142            public void load(InputStreamReader isr) throws MaltChainedException  {
143                    try {
144                            BufferedReader bin = new BufferedReader(isr);
145                            String fileLine;
146                            SymbolTable table = null;
147                            bin.mark(2);
148                            if (bin.read() == '\t') {
149                                    bin.reset();
150                                    loadHeader(bin);
151                            } else {
152                                    bin.reset();
153                            }
154                            while ((fileLine = bin.readLine()) != null) {
155                                    if (fileLine.length() > 0) {
156                                            table = addSymbolTable(fileLine);
157                                            table.load(bin);
158                                    }
159                            }
160                            bin.close();
161                    } catch (IOException e) {
162                            throw new SymbolException("Could not load the symbol tables. ", e);
163                    }                       
164            }
165            
166            public void load(String fileName, String charSet) throws MaltChainedException  {
167                    try {
168                            load(new InputStreamReader(new FileInputStream(fileName), charSet));
169    
170                    } catch (FileNotFoundException e) {
171                            throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e);
172                    } catch (UnsupportedEncodingException e) {
173                            throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
174                    }               
175            }
176            
177            
178            public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy) throws MaltChainedException {
179                    try {
180                            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
181                            String fileLine;
182                            TrieSymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy);
183    
184                            while ((fileLine = br.readLine()) != null) {
185                                    table.addSymbol(fileLine.trim());
186                            }
187                            return table;
188                    } catch (FileNotFoundException e) {
189                            throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
190                    } catch (UnsupportedEncodingException e) {
191                            throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
192                    } catch (IOException e) {
193                            throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
194                    }
195            }
196            
197            public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy, String rootLabel) throws MaltChainedException {
198                    try {
199                            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
200                            String fileLine;
201                            TrieSymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy, rootLabel);
202    
203                            while ((fileLine = br.readLine()) != null) {
204                                    table.addSymbol(fileLine.trim());
205                            }
206                            return table;
207                    } catch (FileNotFoundException e) {
208                            throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
209                    } catch (UnsupportedEncodingException e) {
210                            throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
211                    } catch (IOException e) {
212                            throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
213                    }
214            }
215            
216            public void printSymbolTables(Logger logger) throws MaltChainedException  {
217                    for (TrieSymbolTable table : symbolTables.values()) {
218                            table.printSymbolTable(logger);
219                    }       
220            }
221    }