/*
* This file is part of libswish3
* Copyright (C) 2010 Peter Karman
*
* libswish3 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libswish3 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libswish3; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* all XS stuff is prefixed with 'sp_' for Swish Perl */
#include "xs_boiler.h"
#include "libswish3.h"
#include "macros.h"
#include "xs_helpers.c"
MODULE = SWISH::3 PACKAGE = SWISH::3
PROTOTYPES: enable
void
_setup(CLASS)
char* CLASS;
CODE:
/* Perl has its own setenv/putenv, which can break when libswish3 uses native setenv().
* This magic Perl var seems to fix it, per this thread:
* http://grokbase.com/post/2004/11/11/suse-s-perl-safe-putenf-diff/IQxdco6nM5eY6zUPwmOkuMRH3vo
* Where the compile-time def was used, this var is not available (e.g. Solaris).
*/
#ifndef PERL_USE_SAFE_PUTENV
PL_use_safe_putenv = 1;
#endif /* PERL_USE_SAFE_PUTENV */
swish_setup();
swish_3*
_init(CLASS)
char* CLASS;
PREINIT:
swish_3* s3;
CODE:
s3 = swish_3_init( &sp_handler, NULL );
s3->ref_cnt++;
s3->stash = sp_Stash_new();
sp_Stash_set_char( s3->stash, DATA_CLASS_KEY, DATA_CLASS );
sp_Stash_set_char( s3->stash, CONFIG_CLASS_KEY, CONFIG_CLASS );
sp_Stash_set_char( s3->stash, ANALYZER_CLASS_KEY, ANALYZER_CLASS );
sp_Stash_set_char( s3->stash, PARSER_CLASS_KEY, PARSER_CLASS );
sp_Stash_set_char( s3->stash, SELF_CLASS_KEY, CLASS );
//warn("new() stash refcnt = %d\n", SvREFCNT((SV*)SvRV((SV*)s3->stash)));
//sp_describe_object( (SV*)s3->stash );
s3->analyzer->tokenizer = (&sp_tokenize3);
s3->analyzer->stash = sp_Stash_new();
sp_Stash_set_char( s3->analyzer->stash, SELF_CLASS_KEY, ANALYZER_CLASS );
s3->config->stash = sp_Stash_new();
sp_Stash_set_char( s3->config->stash, SELF_CLASS_KEY, CONFIG_CLASS );
RETVAL = s3;
OUTPUT:
RETVAL
void
_show_sizes(self)
SV* self;
CODE:
warn("sizeof pointer: %ld\n", sizeof(SV*));
warn("sizeof long: %ld\n", sizeof(long));
warn("sizeof int: %ld\n", sizeof(int));
warn("sizeof IV: %ld\n", sizeof(IV));
char*
xml2_version(self)
SV* self;
CODE:
RETVAL = (char*)swish_libxml2_version();
OUTPUT:
RETVAL
char*
version(self)
SV* self;
CODE:
RETVAL = (char*)swish_lib_version();
OUTPUT:
RETVAL
void
wc_report(self, codepoint)
SV *self;
IV codepoint;
CODE:
sp_isw_report(codepoint);
SV*
error(self)
swish_3 *self;
PREINIT:
IV err_code;
SV *msg;
CODE:
msg = newSV(0);
err_code = SvIV( sp_Stash_get(self->stash, "error") );
if (err_code == 0) {
msg = &PL_sv_undef;
}
else {
sv_setpv( msg, swish_err_msg( (int)err_code ) );
}
RETVAL = msg;
OUTPUT:
RETVAL
SV*
slurp(self, filename, ...)
SV* self;
char* filename;
PREINIT:
xmlChar* buf;
struct stat info;
boolean binmode;
off_t buflen;
CODE:
binmode = SWISH_FALSE;
if ( items > 2 ) {
binmode = SvIV(ST(2));
}
if (stat((char *)filename, &info)) {
croak("Can't stat %s: %s\n", filename, strerror(errno));
}
buflen = info.st_size;
if (swish_fs_looks_like_gz( (xmlChar*)filename )) {
//warn("%s looks like gz\n", filename);
buf = swish_io_slurp_gzfile_len((xmlChar*)filename, &buflen, binmode);
}
else {
buf = swish_io_slurp_file_len((xmlChar*)filename, buflen, binmode);
}
RETVAL = newSV(0);
//warn("%s re-using SV with strlen %d\n", filename, (int)buflen);
// buflen+1 because the swish_io* function will nul-terminate
if (buf[buflen] != '\0') {
croak("Buffer was not NUL-terminated (buflen=%d)\n", (int)buflen);
}
sv_usepvn_flags(RETVAL, (char*)buf, buflen+1, SV_SMAGIC | SV_HAS_TRAILING_NUL);
swish_memcount_dec(); // must do manually since Perl will free() it.
OUTPUT:
RETVAL
SV*
get_file_ext(self, filename)
swish_3* self;
SV* filename;
PREINIT:
xmlChar* ext;
char* filename_ptr;
SV* file_ext;
CODE:
file_ext = newSV(0);
filename_ptr = SvPV(filename, PL_na);
ext = swish_fs_get_file_ext( (xmlChar*)filename_ptr );
if (ext == NULL) {
file_ext = &PL_sv_undef;
}
else {
sv_setpv( file_ext, (const char*)ext );
}
RETVAL = file_ext;
OUTPUT:
RETVAL
SV*
get_mime(self, filename)
swish_3* self;
SV* filename;
PREINIT:
xmlChar* ext;
char* filename_ptr;
xmlChar* mime_ptr;
SV* mime;
CODE:
mime = newSV(0);
filename_ptr = SvPV(filename, PL_na);
ext = swish_fs_get_file_ext( (xmlChar*)filename_ptr );
if (ext == NULL) {
mime = &PL_sv_undef;
}
else {
// swish_mime_get_type never returns NULL
// and falls back to default mime
// but we want truth here so handle hash lookup manually.
mime_ptr = swish_hash_fetch(self->config->mimes, ext);
swish_xfree(ext);
if (mime_ptr == NULL) {
mime = &PL_sv_undef;
}
else {
sv_setpv( mime, (const char*)swish_xstrdup( mime_ptr ) );
}
}
RETVAL = mime;
OUTPUT:
RETVAL
SV*
get_real_mime(self, filename)
swish_3* self;
SV* filename;
PREINIT:
xmlChar* ext;
char* filename_ptr;
xmlChar* filename_copy_ptr;
unsigned int filename_len;
xmlChar* mime_ptr;
SV* mime;
CODE:
mime = newSV(0);
filename_ptr = SvPV(filename, PL_na);
ext = swish_fs_get_file_ext( (xmlChar*)filename_ptr );
if (ext == NULL) {
mime = &PL_sv_undef;
}
else if (xmlStrEqual(ext, BAD_CAST "gz")) {
filename_copy_ptr = swish_xstrdup( (xmlChar*)filename_ptr );
filename_len = strlen(filename_ptr);
filename_copy_ptr[filename_len-3] = '\0';
swish_xfree(ext);
ext = swish_fs_get_file_ext(filename_copy_ptr);
swish_xfree(filename_copy_ptr);
if (ext == NULL) {
mime = &PL_sv_undef;
}
else {
mime_ptr = swish_hash_fetch(self->config->mimes, ext);
swish_xfree(ext);
if (mime_ptr == NULL) {
mime = &PL_sv_undef;
}
else {
sv_setpv( mime, (const char*)swish_xstrdup( mime_ptr ) );
}
}
}
else {
mime_ptr = swish_hash_fetch(self->config->mimes, ext);
swish_xfree(ext);
if (mime_ptr == NULL) {
mime = &PL_sv_undef;
}
else {
sv_setpv( mime, (const char*)swish_xstrdup( mime_ptr ) );
}
}
RETVAL = mime;
OUTPUT:
RETVAL
boolean
looks_like_gz(self, filename)
SV* self;
SV* filename;
PREINIT:
char* filename_ptr;
CODE:
filename_ptr = SvPV(filename, PL_na);
RETVAL = swish_fs_looks_like_gz( (xmlChar*)filename_ptr );
OUTPUT:
RETVAL
int
parse_file(self, filename)
swish_3* self;
SV* filename;
PREINIT:
char* file;
int ret;
CODE:
file = SvPV(filename, PL_na);
# need to swap return values to make it Perlish
ret = swish_parse_file( self, (xmlChar*)file );
sp_Stash_set_int( self->stash, "error", ret );
RETVAL = ret ? 0 : 1;
OUTPUT:
RETVAL
int
parse_buffer(self, buffer)
swish_3* self;
SV* buffer;
PREINIT:
char* buf;
int ret;
CODE:
buf = SvPV(buffer, PL_na);
# need to swap return values to make it Perlish
ret = swish_parse_buffer( self, (xmlChar*)buf );
sp_Stash_set_int( self->stash, "error", ret );
RETVAL = ret ? 0 : 1;
OUTPUT:
RETVAL
# TODO
int
parse_fh(self, fh)
swish_3* self;
SV* fh;
CODE:
croak("parse_fh() is not yet implemented.");
RETVAL = 0;
OUTPUT:
RETVAL
# accessors/mutators
void
_set_or_get(self, ...)
swish_3* self;
ALIAS:
set_config = 1
get_config = 2
set_analyzer = 3
get_analyzer = 4
set_parser = 5
get_parser = 6
set_handler = 7
get_handler = 8
set_data_class = 9
get_data_class = 10
set_parser_class = 11
get_parser_class = 12
set_config_class = 13
get_config_class = 14
set_analyzer_class = 15
get_analyzer_class = 16
set_regex = 17
get_regex = 18
_x_placeholder = 19
get_stash = 20
PREINIT:
SV *RETVAL;
char *class;
PPCODE:
{
//warn("number of items %d for ix %d", items, ix);
START_SET_OR_GET_SWITCH
// set_config
case 1: if (!sv_derived_from(ST(1), CONFIG_CLASS)) {
croak("usage: must be a %s object", CONFIG_CLASS);
}
self->config->ref_cnt--;
//warn("set_config ref_cnt of old config = %d", self->config->ref_cnt);
if (self->config->ref_cnt < 1) {
if (SWISH_DEBUG) {
warn("freeing config on set_config");
}
sp_Stash_destroy(self->config->stash);
if (self->config->stash != NULL) {
//SWISH_WARN("set config stash to NULL");
self->config->stash = NULL;
}
swish_config_free(self->config);
}
self->config = (swish_Config*)sp_extract_ptr(ST(1));
self->config->ref_cnt++;
//warn("set_config ref_cnt of new config = %d", self->config->ref_cnt);
break;
// get_config
case 2: if (gimme != G_VOID)
self->config->ref_cnt++;
class = sp_Stash_get_char(self->stash, CONFIG_CLASS_KEY);
sp_Stash_set_char( self->config->stash, SELF_CLASS_KEY, class );
RETVAL = sp_bless_ptr(class, self->config);
break;
// set_analyzer
case 3: if (!sv_derived_from(ST(1), ANALYZER_CLASS)) {
croak("usage: must be a %s object", ANALYZER_CLASS);
}
self->analyzer->ref_cnt--;
//warn("set_analyzer ref_cnt of old analyzer: %d", self->analyzer->ref_cnt);
if (self->analyzer->ref_cnt < 1) {
if (SWISH_DEBUG) {
warn("freeing analyzer on set_analyzer");
}
sp_Stash_destroy(self->analyzer->stash);
swish_analyzer_free(self->analyzer);
}
self->analyzer = (swish_Analyzer*)sp_extract_ptr(ST(1));
self->analyzer->ref_cnt++;
//warn("set_analyzer ref_cnt of new analyzer: %d", self->analyzer->ref_cnt);
break;
// get_analyzer
case 4: if (gimme != G_VOID)
self->analyzer->ref_cnt++;
class = sp_Stash_get_char(self->stash, ANALYZER_CLASS_KEY);
sp_Stash_set_char( self->analyzer->stash, SELF_CLASS_KEY, class );
RETVAL = sp_bless_ptr(class, self->analyzer);
break;
// set_parser
case 5: if (!sv_derived_from(ST(1), PARSER_CLASS)) {
croak("usage: must be a %s object", PARSER_CLASS);
}
self->parser->ref_cnt--;
if (self->parser->ref_cnt < 1) {
if (SWISH_DEBUG) {
warn("freeing parser on set_parser");
}
swish_parser_free(self->parser);
}
self->parser = (swish_Parser*)sp_extract_ptr(ST(1));
self->parser->ref_cnt++;
break;
// get_parser
case 6: if (gimme != G_VOID)
self->parser->ref_cnt++;
class = sp_Stash_get_char(self->stash, PARSER_CLASS_KEY);
RETVAL = sp_bless_ptr(class, self->parser);
break;
// set_handler
case 7: sp_Stash_replace(self->stash, HANDLER_KEY, ST(1));
break;
// get_handler
case 8: RETVAL = sp_Stash_get(self->stash, HANDLER_KEY);
break;
// set_data_class
case 9: sp_Stash_replace(self->stash, DATA_CLASS_KEY, ST(1));
break;
// get_data_class
case 10: RETVAL = sp_Stash_get(self->stash, DATA_CLASS_KEY);
break;
// set_parser_class
case 11: sp_Stash_replace(self->stash, PARSER_CLASS_KEY, ST(1));
break;
// get_parser_class
case 12: RETVAL = sp_Stash_get(self->stash, PARSER_CLASS_KEY);
break;
// set_config_class
case 13: sp_Stash_replace(self->stash, CONFIG_CLASS_KEY, ST(1));
break;
// get_config_class
case 14: RETVAL = sp_Stash_get(self->stash, CONFIG_CLASS_KEY);
break;
// set_analyzer_class
case 15: sp_Stash_replace(self->stash, ANALYZER_CLASS_KEY, ST(1));
break;
// get_analyzer_class
case 16: RETVAL = sp_Stash_get(self->stash, ANALYZER_CLASS_KEY);
break;
// set_regex
case 17: sp_SV_is_qr( ST(1) );
self->analyzer->regex = SvREFCNT_inc( ST(1) );
break;
// get_regex
case 18: RETVAL = self->analyzer->regex;
break;
// get_stash
case 20: RETVAL = self->stash;
break;
END_SET_OR_GET_SWITCH
}
void
DESTROY(self)
SV *self;
PREINIT:
swish_3 *s3;
CODE:
s3 = (swish_3*)sp_extract_ptr(self);
s3->ref_cnt--;
if (SWISH_DEBUG) {
warn("DESTROY %s [0x%lx] [ref_cnt = %d]",
SvPV(ST(0), PL_na), (long)s3, s3->ref_cnt);
}
if (SWISH_DEBUG) {
warn("s3->ref_cnt == %d\n", s3->ref_cnt);
warn("s3->config->ref_cnt == %d\n", s3->config->ref_cnt);
warn("s3->analyzer->ref_cnt == %d\n", s3->analyzer->ref_cnt);
/*
warn("s3->stash refcnt = %d\n",
sp_Stash_inner_refcnt(s3->stash) );
warn("s3->config->stash refcnt = %d\n",
sp_Stash_inner_refcnt( s3->config->stash) );
warn("s3->analyzer->stash refcnt = %d\n",
sp_Stash_inner_refcnt( s3->analyzer->stash) );
*/
}
if (s3->ref_cnt < 1) {
sp_Stash_destroy( s3->stash );
if ( s3->config->ref_cnt == 1 ) {
sp_Stash_destroy( s3->config->stash );
//SWISH_WARN("set config stash to NULL");
s3->config->stash = NULL;
}
if ( s3->analyzer->ref_cnt == 1 ) {
sp_Stash_destroy( s3->analyzer->stash );
s3->analyzer->stash = NULL;
SvREFCNT_dec( s3->analyzer->regex );
s3->analyzer->regex = NULL;
}
swish_3_free( s3 );
}
int
refcount(obj)
SV* obj;
CODE:
RETVAL = SvREFCNT((SV*)SvRV(obj));
OUTPUT:
RETVAL
int
ref_cnt(s3)
swish_3 *s3;
CODE:
RETVAL = s3->ref_cnt;
OUTPUT:
RETVAL
void
describe(self, obj)
SV* self;
SV* obj;
CODE:
sp_describe_object(obj);
void
mem_debug(CLASS)
char* CLASS;
CODE:
swish_mem_debug();
int
debug(CLASS, ...)
char * CLASS;
CODE:
RETVAL = SWISH_DEBUG;
if ( items > 1 ) {
SWISH_DEBUG = SvIV( ST(1) );
warn("SWISH_DEBUG set to %d", SWISH_DEBUG);
}
OUTPUT:
RETVAL
int
get_memcount(CLASS)
char* CLASS;
CODE:
RETVAL = swish_memcount_get();
OUTPUT:
RETVAL
# tokenize() from Perl space uses same C func as tokenizer callback
swish_TokenIterator *
tokenize(self, str, ...)
swish_3* self;
SV* str;
PREINIT:
char* CLASS;
swish_TokenIterator* ti;
swish_MetaName* metaname;
xmlChar* context;
xmlChar* buf;
CODE:
CLASS = TOKENITERATOR_CLASS;
ti = swish_token_iterator_init(self->analyzer);
ti->ref_cnt++;
context = (xmlChar*)SWISH_DEFAULT_METANAME;
buf = (xmlChar*)SvPV(str, PL_na);
// TODO reimplement as hashref arg
// TODO why this check??
if (!SvUTF8(str))
{
if (swish_is_ascii(buf))
SvUTF8_on(str); /* flags original SV ?? */
else
croak("%s is not flagged as a UTF-8 string and is not ASCII", buf);
}
if ( items > 2 )
{
metaname = (swish_MetaName*)sp_extract_ptr(ST(2));
if ( items > 3 )
context = (xmlChar*)SvPV(ST(3), PL_na);
//warn ("metaname %s context %s\n", metaname, context );
}
else {
metaname = swish_metaname_init(swish_xstrdup((xmlChar*)SWISH_DEFAULT_METANAME));
}
sp_tokenize3( ti, buf, metaname, context );
RETVAL = ti;
OUTPUT:
RETVAL
# native libswish3 tokenizer
swish_TokenIterator *
tokenize_native(self, str, ...)
swish_3* self;
SV* str;
PREINIT:
char* CLASS;
swish_TokenIterator* ti;
swish_MetaName* metaname;
xmlChar* context;
xmlChar* buf;
CODE:
CLASS = TOKENITERATOR_CLASS;
ti = swish_token_iterator_init(self->analyzer);
ti->ref_cnt++;
context = (xmlChar*)SWISH_DEFAULT_METANAME;
buf = (xmlChar*)SvPV(str, PL_na);
// TODO reimplement as hashref arg
// TODO why this check??
if (!SvUTF8(str))
{
if (swish_is_ascii(buf))
SvUTF8_on(str); /* flags original SV ?? */
else
croak("%s is not flagged as a UTF-8 string and is not ASCII", buf);
}
if ( items > 2 )
{
metaname = (swish_MetaName*)sp_extract_ptr(ST(2));
if ( items > 3 )
context = (xmlChar*)SvPV(ST(3), PL_na);
//warn ("metaname %s context %s\n", metaname, context );
}
else {
metaname = swish_metaname_init((xmlChar*)SWISH_DEFAULT_METANAME);
metaname->ref_cnt++;
}
swish_tokenize( ti, buf, metaname, context );
RETVAL = ti;
OUTPUT:
RETVAL
# include the other .xs files
INCLUDE: XS/Constants.xs
INCLUDE: XS/Config.xs
INCLUDE: XS/Analyzer.xs
INCLUDE: XS/Doc.xs
INCLUDE: XS/Data.xs
INCLUDE: XS/Stash.xs
INCLUDE: XS/Property.xs
INCLUDE: XS/MetaName.xs
INCLUDE: XS/PropertyHash.xs
INCLUDE: XS/MetaNameHash.xs
INCLUDE: XS/xml2Hash.xs
INCLUDE: XS/Token.xs
INCLUDE: XS/TokenIterator.xs