The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
use Data::Dump qw( dump );
use File::Temp qw( tempdir );
my $invindex = tempdir( CLEANUP => 1 );

use Lucy;

use_ok('LucyX::Search::AnyTermQuery');
use_ok('LucyX::Search::NullTermQuery');

my $case_folder = Lucy::Analysis::CaseFolder->new;
my $tokenizer   = Lucy::Analysis::RegexTokenizer->new;
my $analyzer    = Lucy::Analysis::PolyAnalyzer->new(
    analyzers => [ $case_folder, $tokenizer, ], );

my $schema   = Lucy::Plan::Schema->new;
my $fulltext = Lucy::Plan::FullTextType->new(
    analyzer => $analyzer,
    sortable => 1,
);
my $fulltext_nosort = Lucy::Plan::FullTextType->new(
    analyzer => $analyzer,
    sortable => 0,
);
$schema->spec_field( name => 'uri',       type => $fulltext );
$schema->spec_field( name => 'nullfield', type => $fulltext );
$schema->spec_field( name => 'title',     type => $fulltext );
$schema->spec_field( name => 'color',     type => $fulltext_nosort );
$schema->spec_field( name => 'date',      type => $fulltext );
$schema->spec_field( name => 'option',    type => $fulltext_nosort );

my $indexer = Lucy::Index::Indexer->new(
    index    => $invindex,
    schema   => $schema,
    create   => 1,
    truncate => 1,
);

my %docs = (
    'doc1' => {
        title  => 'Acute-Phase Reaction',
        color  => 'red blue orange',
        date   => '20100329',
        option => 'a',
    },
    'doc2' => {
        title  => 'Leukemia, Biphenotypic, Acute',
        color  => 'green yellow purple',
        date   => '20100301',
        option => 'b',
    },
    'doc3' => {
        title  => 'Leukemia, Megakaryoblastic, Acute',
        color  => 'brown black white',
        date   => '19720329',
        option => '',
    },
    'doc4' => {
        title  => 'Porphyria, Acute Intermittent',
        color  => '',
        date   => '20100510',
        option => 'c',
    },
    'doc5' => {
        title  => '',
        color  => 'white',
        date   => '20100510',
        option => 'e',
    },
    'doc6' => {
        title  => 'Acute Kidney Injury',
        color  => 'teal',
        date   => '19000101',
        option => 'd',
    },
);

# set up the index
for my $doc ( keys %docs ) {
    my $doc_ref = { %{ $docs{$doc} }, uri => $doc };

    #diag( dump $doc_ref );
    $indexer->add_doc($doc_ref);
}

$indexer->commit;

my $searcher = Lucy::Search::IndexSearcher->new( index => $invindex, );

# search
my %queries = (
    'color:NULL'     => 1,
    'color!:NULL'    => 5,
    'option:NULL'    => 1,
    'title!:NULL'    => 5,    # this and next should parse identically
    'NOT title:NULL' => 5,
    'nullfield:NULL' => 6,
);

for my $str ( sort keys %queries ) {
    my $query = make_query($str);

    my $hits_expected = $queries{$str};
    if ( ref $hits_expected ) {
        $query->debug(1);
        $hits_expected = $hits_expected->[0];
    }

    #diag($query);
    my $hits = $searcher->hits(
        query      => $query,
        offset     => 0,
        num_wanted => 10,       # more than we have
    );

    is( $hits->total_hits, $hits_expected, "$str == $hits_expected" );

    if ( $hits->total_hits != $hits_expected ) {
        diag( dump( $query->dump ) );

        my $count = 0;
        while ( my $hit = $hits->next ) {
            $count++;
            diag( " [$count] hit: " . $hit->{uri} );
        }
    }

}

# allow for adding new queries without adjusting test count
done_testing( scalar( keys %queries ) + 2 );

sub make_query {
    my $str = shift;

    my ( $field, $op ) = ( $str =~ m/(\w+)(!?:)NULL/ );

    #diag("field == \'$field\'  op=$op");
    my $query;
    if ( $op eq '!:' or $str =~ m/^NOT/ ) {
        $query = LucyX::Search::AnyTermQuery->new( field => $field, );
    }
    else {
        $query = LucyX::Search::NullTermQuery->new( field => $field, );
    }
    return $query;
}