/* * WuManber.xs * Copyright (c) 2007-2010, Juergen Weigert, Novell Inc. * This module is free software. It may be used, redistributed * and/or modified under the same terms as Perl itself. * * see perldoc perlxstut * see Rolf Stiebe; Textalgoritmen WS 2005/6 * see TR94-17_WuManber.pdf */ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include "wumanber_impl.h" static void push_result(unsigned int idx, unsigned long offset, void *data) { AV *r = (AV *)data; // In perl, indices run from 0..n_pat-1 // In C, indices run from 1..n_pat #if 1 AV *loc = (AV *)sv_2mortal((SV *)newAV()); av_push(loc, newSVnv(offset)); av_push(loc, newSVnv(idx-1)); av_push(r, newRV((SV *)loc)); #else av_push(r, newSVnv(offset)); av_push(r, newSVnv(idx-1)); #endif } MODULE = Search::WuManber PACKAGE = Search::WuManber PROTOTYPES: ENABLE #define BLOCK_SIZE 3 #define HASH1_SIZE 0x10 int init_tables(obj) HV *obj PREINIT: AV* p; SV** pp; SV **svp; int i, n_patterns; unsigned char **pattern_list; unsigned int case_sensitive; INIT: // init PAT table pp = hv_fetch(obj, "patterns", 8, 0); if (!pp) croak("init_tables: no patterns in obj\n"); // next test needed to avoid segv if (SvTYPE(SvRV(*pp)) != SVt_PVAV) croak("init_tables: patterns not an ARRAY-ref\n"); p = (AV *)SvRV(*pp); n_patterns = av_len(p); pattern_list = (unsigned char **)calloc(sizeof(unsigned char *), n_patterns+2); svp = hv_fetch(obj, "case_sensitive", 14, 0); if (!svp) croak("init_tables: no 'case_sensitive' in obj\n"); case_sensitive = SvUV(*svp); CODE: i = 0; while (i++ <= n_patterns) { SV** ep = av_fetch(p, i-1, 0); STRLEN slen; unsigned char *e; // next test not really needed. perl converts almost anything to string. if (!SvPOK(*ep)) croak("init_tables: pattern[%d] is not a string\n", i); pattern_list[i] = e = (unsigned char *)SvPV(*ep, slen); // printf("pattern[%d] = '%s'\n", i, e); } pattern_list[i] = NULL; // just to be sure struct WuManber *wm = (struct WuManber *)calloc(1, sizeof(struct WuManber)); wm->progname = "perl(Search::WuManber)"; prep_pat(wm, n_patterns+1, pattern_list, !case_sensitive); // FIXME: this needs a destructor, to free the memory bound in wm's pointers. (void)hv_store(obj, "wm", 2, newSVpvn((char *)wm, sizeof(*wm)), 0); (void)hv_store(obj, "BLOCK_SIZE", 9, newSViv(wm->use_bs1?1:(wm->use_bs3?3:2)), 0); RETVAL = 1; OUTPUT: RETVAL SV * find_all(obj,textsv) HV *obj SV *textsv PREINIT: AV *r; // return value STRLEN text_len, n; unsigned char *text; struct WuManber *wm; SV **svp; text = (unsigned char *)SvPV(textsv, text_len); // warn("find_all: text='%s', text_len=%d\n", text, (unsigned int)text_len); INIT: svp = hv_fetch(obj, "wm", 2, 0); if (!svp) croak("find_all: no 'wm' in obj\n"); wm = (struct WuManber *)SvPV(*svp, n); if (!svp) croak("find_all: sizeof(wm)=%d, expected %d\n", (int)n, (int)sizeof(struct WuManber)); search_init(wm, "argv[0]"); r = (AV *)sv_2mortal((SV *)newAV()); wm->cb = push_result; wm->cb_data = (void *)r; CODE: search_text(wm, text, text_len); RETVAL = newRV((SV *)r); OUTPUT: RETVAL