# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 8; use Lingua::EN::Tagger; ok('Lingua::EN::Tagger', 'module compiled'); # If we made it this far, we're ok. ######################### # Insert your test code below, the Test module is use()ed here so read # its man page ( perldoc Test ) for help writing this test script. ###################################### # Start by creating the parser object ###################################### ok( $parser = Lingua::EN::Tagger->new(), 'creating parser object' ); ################################ # Clear the lexicon and # reload it from the yaml files # Don't try this at home! ################################ %Lingua::EN::Tagger::_HMM = (); %Lingau::EN::Tagger::_LEXICON = (); ok( $parser->_load_tags( 'tags.yml' ), 're-load tags' ); ok( $parser->_load_words( 'words.yml' ), 're-load words' ); ok( $parser->_load_words( 'unknown.yml' ), 're-load unknown words' ); ######################################## # Check the accuracy of the parser ######################################## $tagged = $parser->get_readable( penn() ); $accuracy = compute_accuracy( $tagged, penn_benchmark() ); cmp_ok( $accuracy, '>=', 95, "overall accuracy ($accuracy%)" ); ########################## # Check hyphenated words ########################## $tagged = $parser->get_readable( hyphen() ); $accuracy = compute_accuracy( $tagged, hyphen_benchmark() ); cmp_ok( $accuracy, "==", 100, "hyphenated words ($accuracy%)" ); ################################### # Check a string of words that are # largly unknown to the lexicon ################################### $tagged = $parser->get_readable( jibberish() ); $accuracy = compute_accuracy( $tagged, jibberish_benchmark() ); cmp_ok( $accuracy, '>=', 80, "unknown word accuracy ($accuracy%)" ); ############################################### # Words that mostly don't occur in the lexicon ############################################### sub jibberish { return "Nils occludes the 5 corybantic sciolists from fressing upon the northeast-oriented perambulations of the yabbering doyenne"; } sub jibberish_benchmark { return "Nils/NNP occludes/VBZ the/DET 5/CD corybantic/JJ sciolists/NNS from/IN fressing/VBG upon/IN the/DET northeast-oriented/JJ perambulations/NNS of/IN the/DET yabbering/VBG doyenne/NN"; } ########################################################## # Hyphenated words that mostly don't occur in the lexicon ########################################################## sub hyphen { # brother-in-law not in lexicon, sister-in-law is return "The brother-in-law. The sister-in-law. A strategy of tit-for-tat among middle-eastern states."; } sub hyphen_benchmark { return "The/DET brother-in-law/NN ./PP The/DET sister-in-law/NN ./PP A/DET strategy/NN of/IN tit-for-tat/NN among/IN middle-eastern/JJ states/NNS ./PP"; } #################################################### # Test the tagger against an actual tagged corpus #################################################### sub penn { return <