use Test::More tests => 15; use_ok("Text::Ngram"); my $text = "abcdefg1235678hijklmnop"; my $hash = Text::Ngram::ngram_counts($text, 3); is_deeply($hash, { 'abc' => 1, 'bcd' => 1, 'cde' => 1, 'def' => 1, 'efg' => 1, 'fg ' => 1, ' hi' => 1, 'hij' => 1, 'ijk' => 1, 'jkl' => 1, 'klm' => 1, 'lmn' => 1, 'mno' => 1, 'nop' => 1, }, "Simple test finds all ngrams"); Text::Ngram::add_to_counts("abc", 3, $hash); is($hash->{abc}, 2, "Simple incremental adding works"); is($hash->{bcd}, 1, "Without messing everything else up"); Text::Ngram::add_to_counts("abc", undef, $hash); is($hash->{abc}, 3, "We can guess the window size"); my $text2 = "Hello, world. Hello, big world."; is_deeply(Text::Ngram::ngram_counts({punctuation => 1}, $text2, 3), { 'ell' => 2, ' he' => 1, 'orl' => 2, 'hel' => 2, ' bi' => 1, 'wor' => 2, 'llo' => 2, ' wo' => 2, 'big' => 1, 'rld' => 2, 'g w' => 1, 'ig ' => 1, 'lo,' => 2, ', b' => 1, '. h' => 1, 'd. ' => 1, ', w' => 1, 'ld.' => 2, 'o, ' => 2, }); is_deeply(Text::Ngram::ngram_counts($text2, 3), { 'ell' => 2, 'lo ' => 2, 'ld ' => 2, ' he' => 1, 'orl' => 2, 'hel' => 2, ' bi' => 1, 'wor' => 2, 'llo' => 2, ' wo' => 2, 'big' => 1, 'rld' => 2, 'g w' => 1, 'ig ' => 1 }); is_deeply(Text::Ngram::ngram_counts({punctuation => 0}, $text2, 3), { 'ell' => 2, 'lo ' => 2, 'ld ' => 2, ' he' => 1, 'orl' => 2, 'hel' => 2, ' bi' => 1, 'wor' => 2, 'llo' => 2, ' wo' => 2, 'big' => 1, 'rld' => 2, 'g w' => 1, 'ig ' => 1 }); is_deeply(Text::Ngram::ngram_counts({punctuation => 0}, $text2, 3), { 'ell' => 2, 'lo ' => 2, 'ld ' => 2, ' he' => 1, 'orl' => 2, 'hel' => 2, ' bi' => 1, 'wor' => 2, 'llo' => 2, ' wo' => 2, 'big' => 1, 'rld' => 2, 'g w' => 1, 'ig ' => 1 }); is_deeply(Text::Ngram::ngram_counts({}, $text2, 3), { 'ell' => 2, 'lo ' => 2, 'ld ' => 2, ' he' => 1, 'orl' => 2, 'hel' => 2, ' bi' => 1, 'wor' => 2, 'llo' => 2, ' wo' => 2, 'big' => 1, 'rld' => 2, 'g w' => 1, 'ig ' => 1 }); is_deeply(Text::Ngram::ngram_counts({spaces => 0}, $text2, 3), { 'ell' => 2, 'orl' => 2, 'hel' => 2, 'wor' => 2, 'llo' => 2, 'big' => 1, 'rld' => 2, }); is_deeply( Text::Ngram::ngram_counts($text2, 4), { 'worl' => 2, ' hel' => 1, 'orld' => 2, 'llo ' => 2, ' wor' => 2, 'ello' => 2, 'rld ' => 2, ' big' => 1, 'ig w' => 1, 'big ' => 1, 'g wo' => 1, 'hell' => 2 } ); my $text3 = "Simple."; is_deeply( Text::Ngram::ngram_counts($text3), { 'simpl' => 1, 'imple' => 1, 'mple ' => 1, } ); is_deeply( Text::Ngram::ngram_counts( {flankbreaks => 0}, $text3), { 'simpl' => 1, 'imple' => 1, } ); is_deeply( Text::Ngram::ngram_counts( {punctuation => 1, flankbreaks => 0}, $text3), { 'simpl' => 1, 'imple' => 1, 'mple.' => 1, } );