# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 5; use Lingua::EN::Tagger; ok('Lingua::EN::Tagger', 'module compiled'); # If we made it this far, we're ok. ######################### # Insert your test code below, the Test module is use()ed here so read # its man page ( perldoc Test ) for help writing this test script. ###################################### # Start by creating the parser object ###################################### ok( $parser = Lingua::EN::Tagger->new( relax => 1), 'relax the parser' ); ############################################### # Compare the parser to the Penn Treebank ############################################### $tagged = $parser->get_readable( penn() ); $accuracy = compute_accuracy( $tagged, penn_benchmark() ); cmp_ok( $accuracy, '>=', 95, "relaxed: overall accuracy ($accuracy%)" ); ########################################################################## # Try to tag some hyphenated words that mostly don't occur in the lexicon ########################################################################## $tagged = $parser->get_readable( hyphen() ); $accuracy = compute_accuracy( $tagged, hyphen_benchmark() ); cmp_ok( $accuracy, "==", 100, "relaxed: hyphenated words ($accuracy%)" ); ######################################################## # Tag some words that mostly don't occur in the lexicon ######################################################## $tagged = $parser->get_readable( jibberish() ); $accuracy = compute_accuracy( $tagged, jibberish_benchmark() ); cmp_ok( $accuracy, '>=', 80, "relaxed: unknown word accuracy ($accuracy%)" ); ############################################### # Words that mostly don't occur in the lexicon ############################################### sub jibberish { return "Nils occludes the 5 corybantic sciolists from fressing upon the northeast-oriented perambulations of the yabbering doyenne"; } sub jibberish_benchmark { return "Nils/NNP occludes/VBZ the/DET 5/CD corybantic/JJ sciolists/NNS from/IN fressing/VBG upon/IN the/DET northeast-oriented/JJ perambulations/NNS of/IN the/DET yabbering/VBG doyenne/NN"; } ########################################################## # Hyphenated words that mostly don't occur in the lexicon ########################################################## sub hyphen { # brother-in-law not in lexicon, sister-in-law is return "The brother-in-law. The sister-in-law. A strategy of tit-for-tat among middle-eastern states."; } sub hyphen_benchmark { return "The/DET brother-in-law/NN ./PP The/DET sister-in-law/NN ./PP A/DET strategy/NN of/IN tit-for-tat/NN among/IN middle-eastern/JJ states/NNS ./PP"; } #################################################### # Test the tagger against an actual tagged corpus #################################################### sub penn { return <