package AI::Categorizer::Learner::Rocchio; $VERSION = '0.01'; use strict; use Params::Validate qw(:types); use AI::Categorizer::FeatureVector; use AI::Categorizer::Learner::Boolean; use base qw(AI::Categorizer::Learner::Boolean); __PACKAGE__->valid_params ( positive_setting => {type => SCALAR, default => 16 }, negative_setting => {type => SCALAR, default => 4 }, threshold => {type => SCALAR, default => 0.1}, ); sub create_model { my $self = shift; foreach my $doc ($self->knowledge_set->documents) { $doc->features->normalize; } $self->{model}{all_features} = $self->knowledge_set->features(undef); $self->SUPER::create_model(@_); delete $self->{knowledge_set}; } sub create_boolean_model { my ($self, $positives, $negatives, $cat) = @_; my $posdocnum = @$positives; my $negdocnum = @$negatives; my $beta = $self->{positive_setting}; my $gamma = $self->{negative_setting}; my $profile = $self->{model}{all_features}->clone->scale(-$gamma/$negdocnum); my $f = $cat->features(undef)->clone->scale( $beta/$posdocnum + $gamma/$negdocnum ); $profile->add($f); return $profile->normalize; } sub get_boolean_score { my ($self, $newdoc, $profile) = @_; return $newdoc->features->normalize->dot($profile); } 1;