# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 12; use Lingua::EN::Tagger; ok('Lingua::EN::Tagger', 'module compiled'); # If we made it this far, we're ok. ######################### # Insert your test code below, the Test module is use()ed here so read # its man page ( perldoc Test ) for help writing this test script. ###################################### # Start by creating the parser object ###################################### ok( $parser = Lingua::EN::Tagger->new( stem => 1 ), 'creating parser object' ); ######################## # Check the stemmer ######################## is( $parser->stem( 'realize' ), 'realiz', 'stemming word' ); ######################## # Does the parser work ######################## # Make sure that it doesn't die when parsing a non-text sample ok( $parser->add_tags( "#!/usr/bin/perl -w\nuse strict;\nmy \$var = 'hello world'; print \$var | 'no value'; "), "non-text sample"); is( $parser->add_tags(), undef, 'NULL string' ); is( $parser->add_tags( '' ), undef, 'Empty string'); # How about real text? ok( $parser->add_tags( penn() ), 'add_tags() method' ); ok( $tagged = $parser->get_readable( penn() ), 'get_readable() method' ); ######################################## # Check the accuracy of the parser # with the get_readable() method ######################################## ok( $accuracy = compute_accuracy( $tagged, penn_benchmark() ), 'computing accuracy' ); cmp_ok( $accuracy, '>=', 95, "overall accuracy ($accuracy%)" ); ########################## # Check hyphenated words ########################## $tagged = $parser->get_readable( hyphen() ); $accuracy = compute_accuracy( $tagged, hyphen_benchmark() ); cmp_ok( $accuracy, "==", 100, "hyphenated words ($accuracy%)" ); ################################### # Check a string of words that are # largly unknown to the lexicon ################################### $tagged = $parser->get_readable( jibberish() ); $accuracy = compute_accuracy( $tagged, jibberish_benchmark() ); cmp_ok( $accuracy, '>=', 80, "unknown word accuracy ($accuracy%)" ); ############################################### # Words that mostly don't occur in the lexicon ############################################### sub jibberish { return "Nils occludes the 5 corybantic sciolists from fressing upon the northeast-oriented perambulations of the yabbering doyenne"; } sub jibberish_benchmark { return "Nils/NNP occludes/VBZ the/DET 5/CD corybantic/JJ sciolists/NNS from/IN fressing/VBG upon/IN the/DET northeast-oriented/JJ perambulations/NNS of/IN the/DET yabbering/VBG doyenne/NN"; } ########################################################## # Hyphenated words that mostly don't occur in the lexicon ########################################################## sub hyphen { # brother-in-law not in lexicon, sister-in-law is return "The brother-in-law. The sister-in-law. A strategy of tit-for-tat among middle-eastern states."; } sub hyphen_benchmark { return "The/DET brother-in-law/NN ./PP The/DET sister-in-law/NN ./PP A/DET strategy/NN of/IN tit-for-tat/NN among/IN middle-eastern/JJ states/NNS ./PP"; } #################################################### # Test the tagger against an actual tagged corpus #################################################### sub penn { return <