/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define C_LUCY_ANALYZER #define C_LUCY_TOKEN #include "Lucy/Util/ToolSet.h" #include "Lucy/Analysis/Analyzer.h" #include "Lucy/Analysis/Token.h" #include "Lucy/Analysis/Inversion.h" Analyzer* Analyzer_init(Analyzer *self) { ABSTRACT_CLASS_CHECK(self, ANALYZER); return self; } Inversion* Analyzer_transform_text(Analyzer *self, CharBuf *text) { size_t token_len = CB_Get_Size(text); Token *seed = Token_new((char*)CB_Get_Ptr8(text), token_len, 0, token_len, 1.0, 1); Inversion *starter = Inversion_new(seed); Inversion *retval = Analyzer_Transform(self, starter); DECREF(seed); DECREF(starter); return retval; } VArray* Analyzer_split(Analyzer *self, CharBuf *text) { Inversion *inversion = Analyzer_Transform_Text(self, text); VArray *out = VA_new(0); Token *token; while ((token = Inversion_Next(inversion)) != NULL) { VA_Push(out, (Obj*)CB_new_from_trusted_utf8(token->text, token->len)); } DECREF(inversion); return out; }