001    package org.maltparser.core.syntaxgraph.writer;
002    
003    import java.io.BufferedWriter;
004    import java.io.FileNotFoundException;
005    import java.io.FileOutputStream;
006    import java.io.IOException;
007    import java.io.OutputStream;
008    import java.io.OutputStreamWriter;
009    import java.io.UnsupportedEncodingException;
010    import java.util.Iterator;
011    
012    import org.maltparser.core.exception.MaltChainedException;
013    import org.maltparser.core.io.dataformat.ColumnDescription;
014    import org.maltparser.core.io.dataformat.DataFormatException;
015    import org.maltparser.core.io.dataformat.DataFormatInstance;
016    import org.maltparser.core.syntaxgraph.DependencyStructure;
017    import org.maltparser.core.syntaxgraph.TokenStructure;
018    import org.maltparser.core.syntaxgraph.node.TokenNode;
019    /**
020    *
021    *
022    * @author Johan Hall
023    */
024    public class TabWriter implements SyntaxGraphWriter {
025            private BufferedWriter writer;
026            private DataFormatInstance dataFormatInstance;
027            private final StringBuilder output;
028            private boolean closeStream = true;
029    //      private String ID = "ID";
030    //      private String IGNORE_COLUMN_SIGN = "_";
031            private final char TAB = '\t';
032            private final char NEWLINE = '\n';
033    
034            
035            public TabWriter() { 
036                    output = new StringBuilder();
037            }
038            
039            public void open(String fileName, String charsetName) throws MaltChainedException {
040                    try {
041                            open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
042                    } catch (FileNotFoundException e) {
043                            throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
044                    } catch (UnsupportedEncodingException e) {
045                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
046                    }       
047            }
048            
049            public void open(OutputStream os, String charsetName) throws MaltChainedException {
050                    try {
051                            if (os == System.out || os == System.err) {
052                                    closeStream = false;
053                            }
054                            open(new OutputStreamWriter(os, charsetName));
055                    } catch (UnsupportedEncodingException e) {
056                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
057                    }
058            }
059            
060            private void open(OutputStreamWriter osw) throws MaltChainedException {
061                    setWriter(new BufferedWriter(osw));
062            }
063            
064            public void writeProlog() throws MaltChainedException {
065                    
066            }
067            
068            public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
069                    if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) {
070                            return;
071                    }
072                    Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
073                    
074                    for (int i : syntaxGraph.getTokenIndices()) {
075                            try {
076                                    ColumnDescription column = null;
077                                    while (columns.hasNext()) {
078                                            column = columns.next();
079    
080                                            if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) {
081                                                    TokenNode node = syntaxGraph.getTokenNode(i); 
082                                                    if (!column.getName().equals("ID")) {
083                                                            if (node.hasLabel(column.getSymbolTable())) {
084                                                                    output.append(node.getLabelSymbol(column.getSymbolTable()));
085                                                                    if (output.length() != 0) {
086                                                                            writer.write(output.toString());
087                                                                    } else {
088                                                                            writer.write('_');
089                                                                    }
090                                                            } else {
091                                                                    writer.write('_');
092                                                            }
093                                                    } else {
094                                                            writer.write(Integer.toString(i));
095                                                    }
096                                            } else if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) {
097                                                    if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) {
098                                                            writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex()));
099                                                    } else {
100                                                            writer.write(Integer.toString(0));
101                                                    }
102                                                    
103                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) {
104                                                    if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(column.getSymbolTable())) {
105                                                            output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(column.getSymbolTable()));
106                                                    } else {
107                                                            output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(column.getSymbolTable()));
108                                                    }
109                                                    
110                                                    if (output.length() != 0) {
111                                                            writer.write(output.toString());
112                                                    }
113                                            } else {
114                                                    writer.write(column.getDefaultOutput());
115                                            }
116                                            if (columns.hasNext()) {
117                                                    writer.write(TAB);
118                                            }
119                                            output.setLength(0);
120                                    }
121                                    writer.write(NEWLINE);
122                                    columns = dataFormatInstance.iterator();
123                            } catch (IOException e) {
124                                    close();
125                                    throw new DataFormatException("Could not write to the output file. ", e);
126                            }
127                    }
128                    
129                    try {
130                            writer.write('\n');
131                            writer.flush();
132                    } catch (IOException e) {
133                            close();
134                            throw new DataFormatException("Could not write to the output file. ", e);
135                    }
136            }
137            
138            public void writeEpilog() throws MaltChainedException  {
139                    
140            }
141            
142            public BufferedWriter getWriter() {
143                    return writer;
144            }
145    
146            public void setWriter(BufferedWriter writer) throws MaltChainedException  {
147                    close();
148                    this.writer = writer;
149            }
150            
151            public DataFormatInstance getDataFormatInstance() {
152                    return dataFormatInstance;
153            }
154    
155            public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
156                    this.dataFormatInstance = dataFormatInstance;
157            }
158    
159            public String getOptions() {
160                    return null;
161            }
162            
163            public void setOptions(String optionString) throws MaltChainedException {
164                    
165            }
166            
167            public void close() throws MaltChainedException {
168                    try {
169                            if (writer != null) {
170                                    writer.flush();
171                                    if (closeStream) {
172                                            writer.close();
173                                    }
174                                    writer = null;
175                            }
176                    }   catch (IOException e) {
177                            throw new DataFormatException("Could not close the output file. ", e);
178                    } 
179    
180            }
181    }