package Net::YASA; use warnings; use strict; use utf8; use Encode qw/encode decode/; use LWP::UserAgent; =head1 NAME Net::YASA - Interface to YASA (Yet Another Suffix Array) =head1 VERSION Version 0.03 =cut our $VERSION = '0.03'; our $AUTOLOAD; our %ok_field; for my $attr ( qw(content minfreq minlength ) ) { $ok_field{$attr}++; } sub AUTOLOAD { my $self = shift; my $attr = $AUTOLOAD; $attr =~ s/.*:://; return if $attr eq 'DESTROY'; if ($ok_field{$attr}) { $self->{lc $attr} = shift if @_; return $self->{lc $attr}; } else { my $superior = "SUPER::$attr"; $self->$superior(@_); } } use constant YASA_WEB_URL => 'http://yasa.newzilla.org/run/'; =head1 SYNOPSIS This module will submit content to the YASA WebService to return a list of terms and corresponding frequencies. use Net::YASA; my $foo = Net::YASA->new(); my $termset = $foo->extract(); print 'TermSet 1: ', $$termset[0], "\n"; print 'TermSet 2: ', $$termset[1], "\n"; ... =head1 EXPORT A list of functions that can be exported. You can delete this section if you don't export anything, such as for a purely object-oriented module. =head1 METHODS =head2 new =cut sub new { my $class = shift; my $self = { _ua => undef, minlength => 1, minfreq => 2, output => 'xml', _content => undef }; if(@_) { my %arg = @_; foreach (keys %arg) { $self->{lc($_)} = $arg{$_}; } } $self->{_ua} = LWP::UserAgent->new; $self->{_ua}->timeout(30); $self->{_ua}->agent('CPAN::Net::YASA'); bless($self, $class); return($self); } =head2 extract =cut sub extract { my ($self, $content) = @_; die 'No content specified' unless $content ne ""; my $ua = $self->{_ua}; my $response = $ua->post( YASA_WEB_URL.$self->{output}."/", { 'min' => $self->minlength, 'freq' => $self->minfreq, 'content' => encode("utf8",$content), } ); die "Error in extracting data from YASA!\n" unless $response->is_success(); if ($self->{output} eq "json" and eval { require JSON::Any; 1; }) { my $result = $response->content(); my $j = JSON::Any->new; my $data = $j->decode($result); return $data; } else { my $xml = decode("utf8",$response->content()); my @results = (); while ($xml =~ m#([^<]*)(\d+)#g) { push @results, $1."\t".$2; } return \@results; } } =head1 AUTHOR Cheng-Lung Sung, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SEE ALSO YASA (Yet Another Suffix Array) web site: L =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Net::YASA You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 ACKNOWLEDGEMENTS =head1 COPYRIGHT & LICENSE Copyright 2007-2009 Cheng-Lung Sung, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of Net::YASA