The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package Thesaurus::DBI;

use strict;

use vars qw[$VERSION];

$VERSION = '0.01';

use base 'Thesaurus';
use DBI;

use Params::Validate qw( validate SCALAR BOOLEAN OBJECT );

# database structure (only needed to create db)
my $DB_TABLES = << "END" ;
CREATE TABLE assignments (
  word1 bigint(20) unsigned NOT NULL default '0',
  word2 bigint(20) unsigned NOT NULL default '0',
  UNIQUE KEY word1 (word1,word2),
  UNIQUE KEY word2 (word2,word1)
);

CREATE TABLE word (
  ID bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  word varchar(150) NOT NULL default '',
  wordindex varchar(150) default NULL,
  PRIMARY KEY  (ID),
  UNIQUE KEY word (word),
  KEY wordindex (wordindex)
) AUTO_INCREMENT=1 ;

END

#########################
# initialize dbconnection
# overwritten
# param: db-connection or db-access data
# return:
# added: jseibert, 30.05.2006
#########################
sub _init {
    my $self = shift;
    # check parameters
    my %p = validate( @_,
                      { dbhandle => { type => OBJECT, optional => 1, isa => [ qw( DBI ) ] },
                      	dbtype =>	{type=>SCALAR, optional => 1, default=>'dbi:mysql'},
                        dbname =>	{type=>SCALAR},
                        dbhost  => { type => SCALAR, optional => 1, default => 'localhost' },
                        dbuser     => { type => SCALAR, optional => 1, default=>'' },
                        dbpassword     => { type => SCALAR, optional => 1, default=>'' },
                        
                      },
                    );
                    
  	# use existing database connection
	if ($p{dbhandle}) {
		$self->{db} = $p{dbhandle};

	# open database connection
	} else {
		my $dsn = "$p{dbtype}:host=$p{dbhost};database=$p{dbname}";
		my $dbh = DBI->connect($dsn, $p{dbuser}, $p{dbpassword}, {'PrintError' => '1',	'RaiseError' => '0'} );
	 	if (!$dbh) {
	 		die "db_connect:" . DBI::errstr();
			return undef;
	 	}
	 	$self->{db}   = $dbh;
	}
	
	$self->{params} = \%p;
	return 1;
}


#########################
# save synonyms
# param: list_of_synonyms: ARRAYREF_OF_String
# return: Boolean
# added: jseibert, 01.06.2006
#########################
sub _add_list {
    my $self = shift;
    my $list = shift;

	# create / get id's for every string
	my @ids = map {$self->_save_word($_)} @$list;
	
	# save assignments
	$self->_save_assignment_list(\@ids);
}

#########################
# Search entries in Thesaurus
# in mysql: everything ist case-insensitive
# param: list_of_synonyms: ARRAY_OF_String
# return: HASHREF
# added: jseibert, 01.06.2006
#########################
sub _find {
    my $self = shift;
	
	# hash for results
    my %lists;
    
    # process all parameter and query database
    foreach my $key (@_) {
        my $search_key = $self->{params}{ignore_case} ? lc $key : $key;
        # ignore duplicates
        next if $lists{$key};
		
		my $words = $self->_find_in_db($key);
		
		foreach my $w (@$words) {
			push( @{ $lists{$key} }, $w ) ;
		}
    }
    return \%lists;
}

#########################
# delete synonym
# delete word and all corresponding assignments
# param: word: ARRAY_OF_String
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub delete {
    my $self = shift;
	my @list = @_;
	
	# map words to (existing) id's
	my @ids = map {$self->_find_word($_)} @list;
	
	# delete all words
	for (my $i=0; $i<@ids; $i++) {
		my $id = $ids[$i];
		next if (!$id);
		$self->_delete_word($id);
	}
}

#########################
# Create database-tables for a new thesaurus
# param: 
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub create_tables {
	my $self = shift;
	my @queries = split(';', $DB_TABLES);
	for (@queries) {
		# ingore empty lines / queries
		next if ($_ =~ /^\s*$/);
		$self->_db_do($_);
	}
	return;
}


######################### Internal helper methods #######################

#########################
# search for an existing keyword in database. Create new one if none found.
# param: word: String
# return: ID: INT
# added: jseibert, 30.05.2006
#########################
sub _save_word {
	my $self = shift;
	my $word = shift;
	my $id = $self->_find_word($word);
	# existing word?
	if ($id) {
		return $id;

	# create new
	} else {
		# create index value: (additional information in brackets will be removed)
		my $key = $word;
		$key =~ s/\s*\(.*?\)//gi;
		$key =~ s/\s*$//gi;
		$key =~ s/^\s*//gi;
		
		$self->_db_do("INSERT INTO word SET word = ?, wordindex = ? ", $word, $key);
		my $id = $self->_db_singlevalue("SELECT LAST_INSERT_ID() FROM word LIMIT 1");
		return $id;
	}
}

#########################
# Save assignments between words
# param: ids: ARRAYREF_OF_INT
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub _save_assignment_list {
	my $self = shift;
	my $ids = shift;
	# assign every word(id) with all others
	for (my $i=0; $i<@$ids; $i++) {
		my $id1 = $ids->[$i];
		for (my $j=$i+1; $j < @$ids; $j++) {
			my $id2 = $ids->[$j];
			$self->_save_assignment($id1, $id2);
		}
	}
	return 1;
}

#########################
# helper method: save a single word assignment
# param: id1: INT, id2: INT
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub _save_assignment {
	my $self = shift;
	my $id1 = shift;
	my $id2 = shift;
	return if (!$id1 || !$id2);
	
	if (!$self->_find_assignment($id1, $id2)) {
		$self->_db_do('INSERT INTO assignments SET word1 = ?, word2 = ?', $id1, $id2);
	}
	
	return 1;
}

#########################
# helper-method: serach for an existing assignment
# param: id1: INT, id2: INT
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub _find_assignment {
	my $self = shift;
	my $id1 = shift;
	my $id2 = shift;
	
	my $sql = 'SELECT 1 FROM assignments WHERE (word1 = ? AND word2 = ?) OR (word1 = ? AND word2 = ?) LIMIT 1';
	my $found = $self->_db_singlevalue($sql, $id1, $id2, $id2, $id1);
	return $found || 0;
}

#########################
# delete a word and all assignments to others
# param: word_id: INT
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub _delete_word {
	my $self = shift;
	my $id = shift;
	
	my $sql = 'DELETE FROM word WHERE ID = ?';
	$self->_db_do($sql, $id);
	$sql = 'DELETE FROM assignments WHERE (word1 = ? OR word2 = ?)';
	return $self->_db_do($sql, $id, $id);
}



#########################
# helper-method: search all synonyms of a given word
# param: word: String
# return: ARRAYREF_OF_string
# added: jseibert, 30.05.2006
#########################
sub _find_in_db {
	my $self = shift;
	my $key = shift;
	
	# find list of all aliases for the given word
	my $sql = "SELECT IF (word.ID = w1.ID, w2.word, w1.word) AS alias, word.word AS word FROM word 
INNER JOIN `assignments` ON ( assignments.word1 = word.ID
OR assignments.word2 = word.ID )
INNER JOIN word AS w1 ON assignments.word1 = w1.ID
INNER JOIN word AS w2 ON assignments.word2 = w2.ID
WHERE word.wordindex = ?";
	
	my $sth = $self->_db_query($sql, $key);
	
	my @words = ();
	# fetch every single alias
	while (my ($word) = $sth->fetchrow_array()) {
		push(@words, $word);
	}
	# synonyms found? add the given word in the result list
	if (@words) {
		unshift(@words, $key);
	}
	return \@words;
}

#########################
# search for the given word in the database and return it's ID
# param: word: String
# return: ID: INT
# added: jseibert, 01.06.2006
#########################
sub _find_word {
	my $self = shift;
	my $word = shift;
	return $self->_db_singlevalue('SELECT ID FROM word WHERE word = ?', $word);
}

#########################
# execute db-query, with error detection
# param: query: String, [params: ARRAY_OF_String]
# return: statement: DBI::st
# added: jseibert, 30.05.2006
#########################
sub _db_query {
	my $self = shift;
	my $sql = shift;
	my @data = @_;
	
	# prepare sql query
	my $sth = $self->{'db'}->prepare($sql);
	if (!$sth) {
		my $error = $self->{'db'}->errstr();
		die "db_prepare: $error . on query: $sql";
	}
	
	# execute query
	my $result = $sth->execute(@data);
	if (not defined $result) {
		my $error = $self->{'db'}->errstr();
		die "db_execute: $error . on query: $sql";
	}
	# return statement handler
	return $sth;
}

#########################
# get a single value (one row) from db
# param: sql: String, data: ARRAY_OF_String
# return: String | ARRAY_OF_String
# added: jseibert, 30.05.2006
#########################
sub _db_singlevalue {
	my $self = shift;
	my $sql = shift;
	my $sth = $self->_db_query($sql, @_);
	
	my @values = $sth->fetchrow_array();
	# return eather one value or the whole row
	return (wantarray) ? @values : $values[0]; 
}

#########################
# send insert/update/delete (no result data)
# param: sql: String
# return: Boolean
# added: jseibert, 30.05.2006
#########################
sub _db_do {
	my $self = shift;
	my $sql = shift;
	my $sth = $self->_db_query($sql, @_);
	return 1;
}

1;
__END__

=head1 NAME

Thesaurus::DBI - Store and query synonyms (Thesaurus) in an SQL database.

=head1 SYNOPSIS

	use Thesaurus::DBI;
	
	# create new database connection
	my $th = new Thesaurus::DBI(dbhost=> 'localhost', dbname=>'thesaurus',dbuser=>'user',dbpassword=>'pass');
	
	# use existing database connection
	my $th = new Thesaurus::DBI(dbhandle => $dbi, dbname=>'thesaurus',dbuser=>'user',dbpassword=>'pass');
	
	# initialize database
	$th->create_tables();
	
	# query thesaurus
	my @synonyms = $th->find('synonym');
	
	# add synonyms
	$th->add(['word', 'synonym']);
	
	# delete word
	$th->delete('word');


=head1 DESCRIPTION

This subclass of C<Thesaurus> implements persistence by using an SQL database.

This module requires the C<DBI> module from CPAN. To use it with certain database servers, 
the corresponding database drivers are needed, too.
(Mysql -> DBD::mysql)

Please note, that database servers like MySQL doesn't take care of case-sensitivity.
So the queries to the thesaurus-database wil all bei case-insensitive.

=head1 METHODS

=over 4

=item * new

This subclass's C<new> method takes the following parameters, in
addition to those accepted by its parent class:

=over 8

=item * dbhost => 'localhost'

Host of the database server. Default value: localhost

=item * dbname => 'thesaurus'

Name of the database to connect to.

=item * dbuser => 'user'

Username for the database connection

=item * dbpassword => 'pass'

Password for the database connection

=item * dbhandle => $dbi

If you already have an existing connection to the database where the thesaurus tables are found in (word, assignment),
you can pass it in by using this parameter.

=back

=item * create_tables

Method to initialize the database to store synonyms in. Creates two new database tables to store all words 
and the corresponding assignments.

=back

=head1 SEE ALSO

Thesaurus, DBI, DBD::mysql

=head1 SYNONYM SOURCES

Listed below are some links for synonym databases, that can be used with this module

=item * English

http://www.thesaurus.com/
http://wordnet.princeton.edu/perl/webwn/

=item * Deutsch

http://www.openthesaurus.de

=item * Polish

http://synonimy.ux.pl/

=item * Espanol

http://openoffice-es.sourceforge.net/thesaurus/

=item * Slovenska

http://www.openthesaurus.tk/

=head1 AUTHOR

Jo Seibert, jseibert (at) seibert-media (dot) net

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006 by Jo Seibert

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.5 or,
at your option, any later version of Perl 5 you may have available.

=cut