package NLP::Service; use 5.010000; use feature ':5.10'; use common::sense; use Carp (); BEGIN { use Exporter(); our @ISA = qw(Exporter); our $VERSION = '0.02'; use NLP::StanfordParser; } use Dancer qw(:tests); # we do not want the tests exporting the wrong functions. use Dancer::Plugin::REST; my %_nlp = (); prepare_serializer_for_format; any [qw/get post/] => '/' => sub { #TODO: show a UI form based thing for easy use for the end user. return 'This is ' . config->{appname} . "\n"; }; any [qw/get post/] => '/nlp/models.:format' => sub { return [ keys %_nlp ]; }; any [qw/get post/] => '/nlp/languages.:format' => sub { return [qw/en/]; }; any [qw/get post/] => '/nlp/info.:format' => sub { return { version => $NLP::Service::VERSION, nlplib_name => 'Stanford Parser', nlplib_source => PARSER_SOURCE_URI, nlplib_release => PARSER_RELEASE_DATE, }; }; any [qw/get post/] => '/nlp/relations.:format' => sub { return NLP::StanfordParser::relations(); }; #Dancer::forward does not forward the parameters, hence we have to explicitly #forward them. any [qw/get post/] => '/nlp/parse.:format' => sub { my $model = 'en_pcfg'; my $route = "/nlp/parse/$model." . params->{format}; debug "Forwarding to $route"; if ( request->{method} eq 'GET' ) { return forward $route, { format => params->{format}, model => $model, data => params->{data} }; } else { # HACK inserted until Dancer's forwarding bug gets fixed. # https://github.com/sukria/Dancer/pull/545 # my $data = params->{data}; $data =~ s/^\s+//g; $data =~ s/\s+$//g; my $data = params->{data} or return send_error( { error => "Empty 'data' parameter" }, 500 ); debug "Data is $data\n"; if ( defined $_nlp{$model} ) { my $str = $_nlp{$model}->parse($data); my $aref = eval $str or Carp::carp "Unable to eval $str"; return defined $aref ? $aref : "$str\n"; } return send_error( { error => "Invalid NLP object for $model" }, 500 ); } }; any [qw/get post/] => '/nlp/parse/:model.:format' => sub { my $model = params->{model}; debug "Model is $model"; return send_error( { error => "Unknown parsing model $model" }, 500 ) unless defined $_nlp{$model}; my $data = params->{data}; $data =~ s/^\s+//g; $data =~ s/\s+$//g; my $data = params->{data} or return send_error( { error => "Empty 'data' parameter" }, 500 ); debug "Data is $data\n"; if ( defined $_nlp{$model} ) { my $str = $_nlp{$model}->parse($data); my $aref = eval $str or Carp::carp "Unable to eval $str"; return defined $aref ? $aref : "$str\n"; } return send_error( { error => "Invalid NLP object for $model" }, 500 ); }; sub load_models { my ( $force, $jarpath ) = @_; say 'Forcing loading of all NLP models.' if $force; %_nlp = (); $_nlp{en_pcfg} = new NLP::StanfordParser( model => MODEL_EN_PCFG ) or Carp::croak 'Unable to create MODEL_EN_PCFG for NLP::StanfordParser'; # PCFG load times are reasonable ~ 5 sec. We force load on startup. $_nlp{en_pcfg}->parser if $force; $_nlp{en_factored} = new NLP::StanfordParser( model => MODEL_EN_FACTORED ) or Carp::croak 'Unable to create MODEL_EN_FACTORED for NLP::StanfordParser'; # Factored load times can be quite slow ~ 30 sec. We force load on startup. $_nlp{en_factored}->parser if $force; # PCFG WSJ takes ~ 2-3 seconds to load $_nlp{en_pcfgwsj} = new NLP::StanfordParser( model => MODEL_EN_PCFG_WSJ ) or Carp::croak 'Unable to create MODEL_EN_PCFG_WSJ for NLP::StanfordParser'; $_nlp{en_pcfgwsj}->parser if $force; $_nlp{en_factoredwsj} = new NLP::StanfordParser( model => MODEL_EN_FACTORED_WSJ ) or Carp::croak 'Unable to create MODEL_EN_FACTORED_WSJ for NLP::StanfordParser'; # FACTORED WSJ takes ~ 20 seconds to load $_nlp{en_factoredwsj}->parser if $force; return unless defined wantarray; # void context returns nothing return wantarray ? %_nlp : scalar( keys(%_nlp) ); } sub run { my %args = @_; my $force = $args{force} if scalar( keys(%args) ); my $config = $args{config} if scalar( keys(%args) ); if ( defined $config and ref $config eq 'HASH' ) { map { set( $_ => $config->{$_} ) } keys %$config; } else { set log => 'error'; set logger => 'console'; set show_errors => 1; set startup_info => 0; } load_models($force); dance; # invoke Dancer } 1; __END__ COPYRIGHT: 2011. Vikas Naresh Kumar. AUTHOR: Vikas Naresh Kumar DATE: 25th March 2011 This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =head1 NAME NLP::Service =head1 SYNOPSIS NLP::Service is a RESTful web service based off Dancer to provide natural language parsing for English. =head1 VERSION 0.02 =head1 METHODS =over =item B The C function starts up the NLP::Service, and listens to requests. It currently takes no parameters. It makes sure that the NLP Engines that are being used are loaded up before the web service is ready. It takes a hash as an argument with the following keys: =over 8 =item B Forces the loading of all NLP models before doing anything. The value expected is anything that is not 0 or undef, to be able to do this. Example, C 1);> =item B Takes in a configuration for the internal service implementation. Currently the implementation is using Dancer, and all of these keys correspond to Dancer::Config. For more details, refer to Dancer config for the acceptable values. Example, C { logger =E 'console' });> =back =item B The C function creates all the required NLP models that are supported. This is internally called by the C function, so the user does not explicitly need to call them. It is useful however, for explicit loading of the models, if the models need to be used in unit tests or elsewhere. In void context it returns nothing, but in scalar context returns the number of models that were loaded, and in list context returns a hash with the keys being model names and the values being the actual references to the perl objects that represent the models. This is rarely necessary for the user to be using. It takes a single argument which is a boolean to forcibly load the parsers or not. By default the lazy load option is assumed unless explicitly set by the user. For example, C for forced loading and C for lazy loading. =back =head1 RESTful API Multiple formats are supported in the API. Most notably they are XML, YAML and JSON. The URIs need to end with C<.xml>, C<.yml> and C<.json> for XML, YAML and JSON, respectively. =over =item B I Returns an array of loaded models. These are the model names that will be used in the other RESTful API URI strings. =item B I Returns an array of supported languages. Default is "en" for English. =item B I Returns a hashref of details about the NLP tool being used. =item B I The user can get a list of all the english grammatical relations supported by the NLP backend. =item B I.(json|xml|yml)> The user can make GET or POST requests to the above URI constructed by the user or their programs. The C<$model> corresponds to one of the available models such as "en_pcfg", "en_factored", etc. The list of supported models are returned by the GET request to C URI. The return value is a Part of Speech tagged variation of the input parameter I. The parameters needed are as follows: =over 2 =item B One of the parameters expected is I which should contain the text that needs to be parsed and whose NLP formation of Part-of-Speech tagging needs to be returned. =back =item B I This performs the same function as above, but picks the default model which is C. It expects the same parameters as above. =back =head1 COPYRIGHT Copyright (C) 2011. B This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Started on 25th March 2011.