use strict; use Test; use AI::Categorizer; use AI::Categorizer::KnowledgeSet; use AI::Categorizer::Collection::InMemory; sub have_module { my $module = shift; return eval "use $module; 1"; } sub need_module { my $module = shift; skip_test("$module not installed") unless have_module($module); } sub skip_test { my $msg = @_ ? shift() : ''; print "1..0 # Skipped: $msg\n"; exit; } sub training_docs { return ( doc1 => {categories => ['farming'], content => 'Sheep are very valuable in farming.' }, doc2 => {categories => ['farming'], content => 'Farming requires many kinds of animals.' }, doc3 => {categories => ['vampire'], content => 'Vampires drink blood and vampires may be staked.' }, doc4 => {categories => ['vampire'], content => 'Vampires cannot see their images in mirrors.'}, ); } sub run_test_docs { my $l = shift; my $doc = new AI::Categorizer::Document ( name => 'test1', content => 'I would like to begin farming sheep.' ); my $r = $l->categorize($doc); print "Categories: ", join(', ', $r->categories), "\n"; ok($r->best_category, 'farming', "Best category is 'farming'"); ok $r->in_category('farming'), 1, sprintf("threshold = %s, score = %s", $r->threshold, $r->scores('farming')); ok $r->in_category('vampire'), '', sprintf("threshold = %s, score = %s", $r->threshold, $r->scores('vampire')); ok $r->all_categories, 2, "Should be 2 categories in total"; $doc = new AI::Categorizer::Document ( name => 'test2', content => "I see that many vampires may have eaten my beautiful daughter's blood." ); $r = $l->categorize($doc); print "Categories: ", join(', ', $r->categories), "\n"; ok($r->best_category, 'vampire', "Best category is 'vampire'"); ok $r->in_category('farming'), '', sprintf("threshold = %s, score = %s", $r->threshold, $r->scores('farming')); ok $r->in_category('vampire'), 1, sprintf("threshold = %s, score = %s", $r->threshold, $r->scores('vampire')); } sub set_up_tests { my %params = @_; my $c = new AI::Categorizer( knowledge_set => AI::Categorizer::KnowledgeSet->new ( name => 'Vampires/Farmers', stopwords => [qw(are be in of and)], ), verbose => $ENV{TEST_VERBOSE} ? 1 : 0, %params, ); ok ref($c), 'AI::Categorizer', "Create an AI::Categorizer object"; my %docs = training_docs(); while (my ($name, $data) = each %docs) { $c->knowledge_set->make_document(name => $name, %$data); } my $l = $c->learner; ok $l; if ($params{learner_class}) { ok ref($l), $params{learner_class}, "Make sure the correct Learner class is instantiated"; } else { ok 1, 1, "Dummy test"; } $l->train; return ($l, \%docs); } sub perform_standard_tests { my ($l, $docs) = set_up_tests(@_); run_test_docs($l); # Make sure we can save state & restore state $l->save_state('t/state'); $l = $l->restore_state('t/state'); ok $l; run_test_docs($l); my $train_collection = AI::Categorizer::Collection::InMemory->new(data => $docs); ok $train_collection; my $h = $l->categorize_collection(collection => $train_collection); ok $h->micro_precision > 0.5; } sub num_setup_tests () { 3 } sub num_standard_tests () { num_setup_tests + 17 } 1;