package WebService::Solr::Query; use Any::Moose; use overload q("") => 'stringify'; my $escape_chars = quotemeta( '+-&|!(){}[]^"~*?:\\' ); has 'query' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); use constant D => 0; use Data::Dumper; sub BUILDARGS { my $class = shift; if ( @_ == 1 && ref $_[ 0 ] && ref $_[ 0 ] eq 'ARRAY' ) { return { query => $_[ 0 ] }; } return { query => \@_ }; } sub stringify { my $self = shift; return $self->_dispatch_struct( $self->query ); } sub _dispatch_struct { my ( $self, $struct ) = @_; my $method = '_struct_' . ref $struct; D && $self->___log( "Dispatching to ->$method " . Dumper $struct ); my $rv = $self->$method( $struct ); D && $self->___log( "Returned: $rv" ); return $rv; } sub _struct_HASH { my ( $self, $struct ) = @_; my @clauses; for my $k ( keys %$struct ) { my $v = $struct->{ $k }; D && $self->___log( "Key => $k, value => " . Dumper( $v ) ); if ( $k =~ m{^-(.+)} ) { my $method = "_op_$1"; D && $self->___log( "Dispatch ->$method " . Dumper( $v ) ); push @clauses, $self->$method( $v ); } else { D && $self->___log( "Dispatch ->_dispatch_value $k, " . Dumper( $v ) ); push @clauses, $self->_dispatch_value( $k, $v ); } } my $rv = join( ' AND ', @clauses ); D && $self->___log( "Returning: $rv" ); return $rv; } sub _struct_ARRAY { my ( $self, $struct ) = @_; my $rv = '(' . join( " OR ", map { $self->_dispatch_struct( $_ ) } @$struct ) . ')'; D && $self->___log( "Returning: $rv" ); return $rv; } sub _dispatch_value { my ( $self, $k, $v ) = @_; my $rv; ### it's an array ref, the first element MAY be an operator! ### it would look something like this: # [ '-and', # { '-require' => 'star' }, # { '-require' => 'wars' } # ]; if ( ref $v and UNIVERSAL::isa( $v, 'ARRAY' ) and defined $v->[ 0 ] and $v->[ 0 ] =~ /^ - ( AND|OR ) $/ix ) { ### XXX we're assuming that all the next statements MUST ### be hashrefs. is this correct? shift @$v; my $op = uc $1; D && $self->___log( "Special operator detected: $op " . Dumper( $v ) ); my @clauses; for my $href ( @$v ) { D && $self->___log( "Dispatch ->_dispatch_struct({ $k, " . Dumper( $href ) . '})' ); ### the individual directive ($href) pertains to the key, ### so we should send that along. my $part = $self->_dispatch_struct( { $k => $href } ); D && $self->___log( "Returned $part" ); push @clauses, '(' . $part . ')'; } $rv = '(' . join( " $op ", @clauses ) . ')'; ### nothing special about this combo, so do a usual dispatch } else { my $method = '_value_' . ( ref $v || 'SCALAR' ); D && $self->___log( "Dispatch ->$method $k, " . Dumper( $v ) ); $rv = $self->$method( $k, $v ); } D && $self->___log( "Returning: $rv" ); return $rv; } sub _value_SCALAR { my ( $self, $k, $v ) = @_; if ( ref $v ) { $v = $$v; } else { $v = '"' . $self->escape( $v ) . '"'; } my $r = qq($k:$v); $r =~ s{^:}{}; D && $self->___log( "Returning: $r" ); return $r; } sub _value_HASH { my ( $self, $k, $v ) = @_; my @clauses; for my $op ( keys %$v ) { my $struct = $v->{ $op }; $op =~ s{^-(.+)}{_op_$1}; D && $self->___log( "Dispatch ->$op $k, " . Dumper( $v ) ); push @clauses, $self->$op( $k, $struct ); } my $rv = join( ' AND ', @clauses ); D && $self->___log( "Returning: $rv" ); return $rv; } sub _value_ARRAY { my ( $self, $k, $v ) = @_; my $rv = '(' . join( ' OR ', map { $self->_value_SCALAR( $k, $_ ) } @$v ) . ')'; D && $self->___log( "Returning: $rv" ); return $rv; } sub _op_default { my ( $self, $v ) = @_; return $self->_dispatch_value( '', $v ); } sub _op_range { my ( $self, $k ) = ( shift, shift ); my @v = @{ shift() }; return "$k:[$v[ 0 ] TO $v[ 1 ]]"; } *_op_range_inc = \&_op_range; sub _op_range_exc { my ( $self, $k ) = ( shift, shift ); my @v = @{ shift() }; return "$k:{$v[ 0 ] TO $v[ 1 ]}"; } sub _op_boost { my ( $self, $k ) = ( shift, shift ); my ( $v, $boost ) = @{ shift() }; $v = $self->escape( $v ); return qq($k:"$v"^$boost); } sub _op_fuzzy { my ( $self, $k ) = ( shift, shift ); my ( $v, $distance ) = @{ shift() }; $v = $self->escape( $v ); return qq($k:$v~$distance); } sub _op_proximity { my ( $self, $k ) = ( shift, shift ); my ( $v, $distance ) = @{ shift() }; $v = $self->escape( $v ); return qq($k:"$v"~$distance); } sub _op_require { my ( $self, $k, $v ) = @_; $v = $self->escape( $v ); return qq(+$k:"$v"); } sub _op_prohibit { my ( $self, $k, $v ) = @_; $v = $self->escape( $v ); return qq(-$k:"$v"); } sub escape { my ( $self, $text ) = @_; $text =~ s{([$escape_chars])}{\\$1}g; return $text; } sub unescape { my ( $self, $text ) = @_; $text =~ s{\\([$escape_chars])}{$1}g; return $text; } sub ___log { my $self = shift; my $msg = shift; ### subroutine the log call came from, and line number the log ### call came from. that's 2 different caller frames :( my $who = join ':', [ caller( 1 ) ]->[ 3 ], [ caller( 0 ) ]->[ 2 ]; ### make sure we prefix every line with a # $msg =~ s/\n/\n#/g; print "# $who: $msg\n"; } no Any::Moose; __PACKAGE__->meta->make_immutable; 1; __END__ =head1 NAME WebService::Solr::Query - Abstract query syntax for Solr queries =head1 SYNOPSIS my $query = WebService::Solr::Query->new( { foo => 'bar' } ); my $result = $solr->search( $query ); =head1 DESCRIPTION WebService::Solr::Query provides a programmatic way to generate queries to be sent to Solr. Syntax wise, it attempts to be as close to L WHERE clauses as possible, with obvious exceptions for idioms that do not exist in SQL. Just as values in SQL::Abstract are SQL-escaped, this module does the appropriate Solr-escaping on all values passed to the object (see C). =head1 QUERY SYNTAX =head2 Key-Value Pairs The simplest way to search is with key value pairs. my $q = WebService::Solr::Query->new( { foo => 'bar' } ); # RESULT: (foo:"bar") =head2 Implicit AND and OR By default, data received as a HASHREF is AND'ed together. my $q = WebService::Solr::Query->new( { foo => 'bar', baz => 'quux' } ); # RESULT: (foo:"bar" AND baz:"quux") Furthermore, data received as an ARRAYREF is OR'ed together. my $q = WebService::Solr::Query->new( { foo => [ 'bar', 'baz' ] } ); # RESULT: (foo:"bar" OR foo:"baz") =head2 Nested AND and OR The ability to nest AND and OR boolean operators is essential to express complex queries. The C<-and> and C<-or> prefixes have been provided for this need. my $q = WebService::Solr::Query->new( { foo => [ -and => { -prohibit => 'bar' }, { -require => 'baz' } ] } ); # RESULT: (((-foo:"bar") AND (+foo:"baz"))) my $q = WebService::Solr::Query->new( { foo => [ -or => { -require => 'bar' }, { -prohibit => 'baz' } ] } ); # RESULT: (((+foo:"bar") OR (-foo:"baz"))) =head2 Default Field To search the default field, use the C<-default> prefix. my $q = WebService::Solr::Query->new( { -default => 'bar' } ); # RESULT: ("bar") =head2 Require/Prohibit my $q = WebService::Solr::Query->new( { foo => { -require => 'bar' } } ); # RESULT: (+foo:"bar") my $q = WebService::Solr::Query->new( { foo => { -prohibit => 'bar' } } ); # RESULT: (-foo:"bar") =head2 Range There are two types of range queries, inclusive (C<-range_inc>) and exclusive (C<-range_exc>). The C<-range> prefix can be used in place of C<-range_inc>. my $q = WebService::Solr::Query->new( { foo => { -range => ['a', 'z'] } } ); # RESULT: (+foo:[a TO z]) my $q = WebService::Solr::Query->new( { foo => { -range_exc => ['a', 'z'] } } ); # RESULT: (+foo:{a TO z}) =head2 Boost my $q = WebService::Solr::Query->new( { foo => { -boost => [ 'bar', '2.0' ] } } ); # RESULT: (foo:"bar"^2.0) =head2 Proximity my $q = WebService::Solr::Query->new( { foo => { -proximity => [ 'bar baz', 10 ] } } ); # RESULT: (foo:"bar baz"~10) =head2 Fuzzy my $q = WebService::Solr::Query->new( { foo => { -fuzzy => [ 'bar', '0.8' ] } } ); # RESULT: (foo:bar~0.8) =head2 Boost my $q = WebService::Solr::Query->new( { foo => { -boost => [ 'bar', '2.0' ] } } ); # RESULT: (foo:"bar"^2.0) =head2 Literal Queries Specifying a scalar ref as a value in a key-value pair will allow arbitrary queries to be sent across the line. B This will bypass any data massaging done on regular strings, thus the onus of properly escaping the data is left to the user. my $q = WebService::Solr::Query->new( { '*' => \'*' } ) # RESULT (*:*) =head1 ACCESSORS =over 4 =item * query - stores the original query structure =back =head1 METHODS =head2 new( \%query ) Creates a new query object with the given hashref. =head2 stringify( ) Converts the supplied structure into a Solr/Lucene query. =head2 escape( $value ) The following values must be escaped in a search value: + - & | ! ( ) { } [ ] ^ " ~ * ? : \ B Values sent to C are automatically escaped for you. =head2 unescape( $value ) Unescapes values escaped in C. =head2 D Debugging constant, default: off. =head2 BUILDARGS Moose method to handle input to C. =head1 SEE ALSO =over 4 =item * L =item * http://wiki.apache.org/solr/SolrQuerySyntax =back =head1 AUTHORS Brian Cassidy Ebricas@cpan.orgE Jos Boumans Ekane@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2008-2012 National Adult Literacy Database This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut