001    package org.maltparser.core.syntaxgraph.feature;
002    
003    import java.util.LinkedHashMap;
004    import java.util.Map;
005    import org.maltparser.core.exception.MaltChainedException;
006    import org.maltparser.core.feature.function.AddressFunction;
007    import org.maltparser.core.feature.function.FeatureFunction;
008    import org.maltparser.core.feature.value.AddressValue;
009    import org.maltparser.core.feature.value.FeatureValue;
010    import org.maltparser.core.feature.value.SingleFeatureValue;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.symbol.SymbolTable;
013    import org.maltparser.core.symbol.SymbolTableHandler;
014    import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
015    import org.maltparser.core.syntaxgraph.SyntaxGraphException;
016    import org.maltparser.core.syntaxgraph.node.DependencyNode;
017    
018    public class DistanceFeature implements FeatureFunction {
019            protected AddressFunction addressFunction1;
020            protected AddressFunction addressFunction2;
021            protected SymbolTableHandler tableHandler;
022            protected SymbolTable table;
023            protected SingleFeatureValue featureValue;
024            protected String normalizationString;
025            protected Map<Integer,String> normalization;
026            
027            
028            public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException {
029                    super();
030                    featureValue = new SingleFeatureValue(this);
031                    setTableHandler(tableHandler);
032                    normalization = new LinkedHashMap<Integer,String>();
033            }
034            
035            /**
036             * Initialize the distance feature function
037             * 
038             * @param arguments an array of arguments with the type returned by getParameterTypes()
039             * @throws MaltChainedException
040             */
041            public void initialize(Object[] arguments) throws MaltChainedException {
042                    if (arguments.length != 3) {
043                            throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. ");
044                    }
045                    // Checks that the two arguments are address functions
046                    if (!(arguments[0] instanceof AddressFunction)) {
047                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. ");
048                    }
049                    if (!(arguments[1] instanceof AddressFunction)) {
050                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. ");
051                    }
052                    if (!(arguments[2] instanceof java.lang.String)) {
053                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. ");
054                    }
055                    setAddressFunction1((AddressFunction)arguments[0]);
056                    setAddressFunction2((AddressFunction)arguments[1]);
057                    
058                    // Creates a symbol table called "DISTANCE" using one null value
059                    setSymbolTable(tableHandler.addSymbolTable("DISTANCE", ColumnDescription.INPUT, "one"));
060                    normalizationString = (String)arguments[2];
061                    String[] items  = normalizationString.split("\\|");
062                    
063                    if (items.length <= 0 || !items[0].equals("0")) {
064                            throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0.");
065                    }
066                    int tmp = -1;
067                    for (int i = 0; i < items.length; i++) {
068                            int v;
069                            try {
070                                    v = Integer.parseInt(items[i]);
071                            } catch (NumberFormatException e) {
072                                    throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e);
073                            }
074                            normalization.put(v, ">="+v);
075                            table.addSymbol(">="+v);
076                            if (tmp != -1 && tmp >= v) {
077                                    throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |");
078                            }
079                            tmp = v;
080                    }
081            }
082            
083            /**
084             * Returns an array of class types used by the feature extraction system to invoke initialize with
085             * correct arguments.
086             * 
087             * @return an array of class types
088             */
089            public Class<?>[] getParameterTypes() {
090                    Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 
091                                                                      org.maltparser.core.feature.function.AddressFunction.class,
092                                                                      java.lang.String.class};
093                    return paramTypes; 
094            }
095            
096            /**
097             * Returns the string representation of the integer <code>code</code> according to the distance feature function. 
098             * 
099             * @param code the integer representation of the symbol
100             * @return the string representation of the integer <code>code</code> according to the distance feature function.
101             * @throws MaltChainedException
102             */
103            public String getSymbol(int code) throws MaltChainedException {
104                    return table.getSymbolCodeToString(code);
105            }
106            
107            /**
108             * Returns the integer representation of the string <code>symbol</code> according to the distance feature function.
109             * 
110             * @param symbol the string representation of the symbol
111             * @return the integer representation of the string <code>symbol</code> according to the distance feature function.
112             * @throws MaltChainedException
113             */
114            public int getCode(String symbol) throws MaltChainedException {
115                    return table.getSymbolStringToCode(symbol);
116            }
117            
118            /**
119             * Cause the distance feature function to update the cardinality of the feature value.
120             * 
121             * @throws MaltChainedException
122             */
123            public void updateCardinality() {
124                    featureValue.setCardinality(table.getValueCounter()); 
125            }
126            
127            /**
128             * Cause the feature function to update the feature value.
129             * 
130             * @throws MaltChainedException
131             */
132            public void update() throws MaltChainedException {
133                    // Retrieve the address value 
134                    final AddressValue arg1 = addressFunction1.getAddressValue();
135                    final AddressValue arg2 = addressFunction2.getAddressValue();
136                    
137                    // if arg1 or arg2 is null, then set a NO_NODE null value as feature value
138                    if (arg1.getAddress() == null || arg2.getAddress() == null) { 
139                            featureValue.setCode(table.getNullValueCode(NullValueId.NO_NODE));
140                            featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE));
141                            featureValue.setKnown(true);
142                            featureValue.setNullValue(true);                        
143                    } else {
144                            // Unfortunately this method takes a lot of time  arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class);
145                            // Cast the address arguments to dependency nodes
146                            final DependencyNode node1 = (DependencyNode)arg1.getAddress();
147                            final DependencyNode node2 = (DependencyNode)arg2.getAddress();
148                            
149                            if (!node1.isRoot() && !node2.isRoot()) { 
150                                    // Calculates the distance
151                                    final int index1 = node1.getIndex();
152                                    final int index2 = node2.getIndex();
153                                    final int distance = Math.abs(index1-index2);
154                                    
155                                    
156                                    int lower = -1;
157                                    boolean f = false;
158                                    for (Integer upper : normalization.keySet()) {
159                                            if (distance >= lower && distance < upper) {
160                                                    featureValue.setCode(table.getSymbolStringToCode(normalization.get(lower)));
161                                                    featureValue.setSymbol(normalization.get(lower));
162                                                    f = true;
163                                                    break;
164                                            }
165                                            lower = upper;
166                                    }
167                                    if (f == false) {
168                                            featureValue.setCode(table.getSymbolStringToCode(normalization.get(lower)));
169                                            featureValue.setSymbol(normalization.get(lower));
170                                    }
171                                    
172                                    // Tells the feature value that the feature is known and is not a null value
173                                    featureValue.setKnown(true);
174                                    featureValue.setNullValue(false);
175    
176                            } else { 
177                                    // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value
178                                    featureValue.setCode(table.getNullValueCode(NullValueId.ROOT_NODE));
179                                    featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE));
180                                    featureValue.setKnown(true);
181                                    featureValue.setNullValue(true);
182                            }
183                    }
184            }
185            
186            /**
187             * Returns the feature value
188             * 
189             * @return the feature value
190             */
191            public FeatureValue getFeatureValue() {
192                    return featureValue;
193            }
194            
195            /**
196             * Returns the symbol table used by the distance feature function
197             * 
198             * @return the symbol table used by the distance feature function
199             */
200            public SymbolTable getSymbolTable() {
201                    return table;
202            }
203            
204            /**
205             * Returns the address function 1 (argument 1) 
206             * 
207             * @return the address function 1 (argument 1) 
208             */
209            public AddressFunction getAddressFunction1() {
210                    return addressFunction1;
211            }
212    
213    
214            /**
215             * Sets the address function 1 (argument 1) 
216             * 
217             * @param addressFunction1 a address function 1 (argument 1) 
218             */
219            public void setAddressFunction1(AddressFunction addressFunction1) {
220                    this.addressFunction1 = addressFunction1;
221            }
222            
223            /**
224             * Returns the address function 2 (argument 2) 
225             * 
226             * @return the address function 1 (argument 2) 
227             */
228            public AddressFunction getAddressFunction2() {
229                    return addressFunction2;
230            }
231    
232            /**
233             * Sets the address function 2 (argument 2) 
234             * 
235             * @param addressFunction2 a address function 2 (argument 2) 
236             */
237            public void setAddressFunction2(AddressFunction addressFunction2) {
238                    this.addressFunction2 = addressFunction2;
239            }
240            
241            /**
242             * Returns symbol table handler
243             * 
244             * @return a symbol table handler
245             */
246            public SymbolTableHandler getTableHandler() {
247                    return tableHandler;
248            }
249    
250            /**
251             * Sets the symbol table handler
252             * 
253             * @param tableHandler a symbol table handler
254             */
255            public void setTableHandler(SymbolTableHandler tableHandler) {
256                    this.tableHandler = tableHandler;
257            }
258    
259            /**
260             * Sets the symbol table used by the distance feature function
261             * 
262             * @param table
263             */
264            public void setSymbolTable(SymbolTable table) {
265                    this.table = table;
266            }
267            
268            public boolean equals(Object obj) {
269                    if (this == obj)
270                            return true;
271                    if (obj == null)
272                            return false;
273                    if (getClass() != obj.getClass())
274                            return false;
275                    return obj.toString().equals(this.toString());
276            }
277            
278            public int hashCode() {
279                    return 217 + (null == toString() ? 0 : toString().hashCode());
280            }
281            
282            public String toString() {
283                    final StringBuilder sb = new StringBuilder();
284                    sb.append("Distance(");
285                    sb.append(addressFunction1.toString());
286                    sb.append(", ");
287                    sb.append(addressFunction2.toString());
288                    sb.append(", ");
289                    sb.append(normalizationString);
290                    sb.append(')');
291                    return sb.toString();
292            }
293    }
294