package HTTP::Rollup; require 5.005; use strict; use CGI::Util qw( unescape ); use Exporter; use vars qw($VERSION @ISA @EXPORT_OK); @ISA = qw(Exporter); @EXPORT_OK = qw(RollupQueryString); $VERSION = '0.8'; my $DEFAULT_DELIMITER = "&"; # Turn on special checking for Doug MacEachern's modperl my $MOD_PERL = 0; if (exists $ENV{MOD_PERL}) { if ($ENV{MOD_PERL_API_VERSION} == 2) { $MOD_PERL = 2; require Apache2::RequestUtil; require APR::Table; } else { $MOD_PERL = 1; require Apache; } } =head1 NAME HTTP::Rollup - translate an HTTP query string to a hierarchical structure =head1 SYNOPSIS use HTTP::Rollup qw(RollupQueryString); my $rollup = new HTTP::Rollup; my $hashref = $rollup->RollupQueryString($query_string); =head1 DESCRIPTION Given input text of the format: employee.name.first=Jane employee.name.last=Smith employee.address=123%20Main%20St. employee.city=New%20York id=444 phone=(212)123-4567 phone=(212)555-1212 @fax=(212)999-8877 Construct an output data structure like this: $hashref = { employee => { name => { "first" => "Jane", "last" => "Smith", }, address => "123 Main St.", city => "New York" }, phone => [ "(212)123-4567", "(212)555-1212" ], fax => [ "(212)999-8877" ], id => 444 }; This is intended as a drop-in replacement for the HTTP query string parsing implemented in CGI.pm, adding the ability to assemble a nested data structure (CGI.pm constructs purely flat structures). e.g. given the sample input above, CGI.pm would produce: $hashref = { "employee.name.first" => [ "Jason" ], "employee.name.last" => [ "Smith" ], "employee.name.address" => [ "123 Main St." ], "employee.name.city" => [ "New York" ], "phone" => [ "(212)123-4567", "(212)555-1212" ], "@fax"=> [ "(212)999-8877" ], "id" => [ 444 ] }; If no $query_string parameter is provided, HTTP::Rollup will attempt to find the input in the same manner used by CGI.pm (the internal _query_string function is pretty much cloned from CGI.pm). HTTP::Rollup runs under both CGI or mod_perl contexts, and from the command line (reads from @ARGV or stdin). =head1 FEATURES =over =item * Data nesting using dot notation =item * Recognizes a list if there is more than one value with the same name =item * Lists can be forced with a leading @-sign, to allow for lists that could have just one element (eliminating ambiguity between scalar and single- element list). The @ will be stripped. =back =head1 FUNCTIONS =item new([ FORCE_LIST => 1 ], [ DELIM => ";" ]) The FORCE_LIST switch causes CGI.pm-style behavior, as above, for backward compatibility. The DELIM option specifies the input field delimiter. This is not auto-detected. Default is the standard ampersand, though semicolon has been proposed as a replacement to avoid conflict with the ampersand used for character entities. Specifying "\n" for the delimiter is helpful for parsing parameters on stdin. =item RollupQueryString() Workhorse function. =begin testing use lib "./blib/lib"; use HTTP::Rollup qw(RollupQueryString); use Data::Dumper; my $s1 = "one=abc&two=def&three=ghi"; my $r1 = new HTTP::Rollup; my $hr = $r1->RollupQueryString($s1); # default delimiter ok ($hr->{one} eq "abc"); ok ($hr->{two} eq "def"); ok ($hr->{three} eq "ghi"); my $string = <<_END_; employee.name.first=Jane employee.name.last=Smith employee.address=123%20Main%20St. employee.city=New%20York id=444 phone=(212)123-4567 phone=(212)555-1212 \@fax=(212)999-8877 _END_ my $r2 = new HTTP::Rollup(DELIM => "\n"); my $hashref = $r2->RollupQueryString($string); ok($hashref->{employee}->{name}->{first} eq "Jane", "2-nested scalar"); ok($hashref->{employee}->{city} eq "New York", "1-nested scalar, with unescape"); ok($hashref->{id} eq "444", "top-level scalar"); ok($hashref->{phone}->[1] eq "(212)555-1212", "auto-list"); ok($hashref->{fax}->[0] eq "(212)999-8877", "\@-list"); my $string2 = "employee.name.first=Jane;employee.name.last=Smith;employee.address=123%20Main%20St.;employee.city=New%York;id=444;phone=(212)123-4567;phone=(212)555-1212;\@fax=(212)999-8877"; my $r3 = new HTTP::Rollup(DELIM => ";"); $hashref = $r3->RollupQueryString($string2); ok($hashref->{employee}->{name}->{first} eq "Jane", "nested scalar"); ok($hashref->{id} eq "444", "top-level scalar"); ok($hashref->{phone}->[1] eq "(212)555-1212", "auto-list"); ok($hashref->{fax}->[0] eq "(212)999-8877", "\@-list"); my $r4 = new HTTP::Rollup(FORCE_LIST => 1, DELIM => "\n"); my $hashref2 = $r4->RollupQueryString($string); ok($hashref2->{'employee.name.first'}->[0] eq "Jane", "nested scalar"); ok($hashref2->{id}->[0] eq "444", "top-level scalar"); ok($hashref2->{phone}->[1] eq "(212)555-1212", "auto-list"); ok($hashref2->{'@fax'}->[0] eq "(212)999-8877", "\@-list"); =end testing =cut my %legal_parameters = ( FORCE_LIST => 1, DELIM => 1, ); sub new { my $cl = shift; my $class = ref($cl) || $cl; my %params = @_; my $self = {}; bless $self, $class; for my $param (keys %params) { if ($legal_parameters{$param}) { $self->{$param} = $params{$param}; } else { print STDERR __PACKAGE__, ": illegal config parameter $param\n"; } } return $self; } sub RollupQueryString { my $self = shift; my $input = shift; my $delimiter = $self->{DELIM} || $DEFAULT_DELIMITER; if (!defined $input) { $input = _query_string(); } my $root = {}; return $root if !$input; # query strings are name-value pairs delimited by & or by newline or semicolon foreach my $nvp (split(/$delimiter/, $input)) { last if $nvp eq "="; # sometimes appears as query string terminator PARSE: my ($name, $value) = split /=/, $nvp; my @levels = split /\./, $name; $value = CGI::Util::unescape($value); if ($self->{FORCE_LIST}) { # always use a list, for CGI.pm-style behavior if (ref $root->{$name}) { # there's already a list there push @{$root->{$name}}, $value; } else { $root->{$name} = [ $value ]; } next; } TRAVERSE: my $node = $root; my $leaf; for ($leaf = shift @levels; scalar(@levels) >= 1; $leaf = shift @levels) { $node->{$leaf} = {} unless defined $node->{$leaf}; # vivify $node = $node->{$leaf}; } SAVE: if (ref $node->{$leaf}) { # there's already a list there $leaf =~ s/^@//; push @{$node->{$leaf}}, $value; } elsif (defined $node->{$leaf}) { # scalar now, convert to a list $node->{$leaf} = [ $node->{$leaf}, $value ]; } elsif ($leaf =~ /^\@/) { # leading @ forces list $leaf =~ s/^@//; $node->{$leaf} = [ $value ]; } else { $node->{$leaf} = $value; } } return $root; } # Most of the following was copied from CGI.pm (some version <2.8). # Frozen here to avoid breakage on CGI changes, and to allow local # alterations (e.g. support for PUT). sub _query_string { my $meth = $ENV{'REQUEST_METHOD'}; my $query_string; if (!defined $meth) { # no REQUEST_METHOD, so must be command-line usage return _read_from_cmdline(); } if ($meth =~ /^(GET|HEAD)$/o) { if ($MOD_PERL == 1) { return Apache->request->args; } elsif ($MOD_PERL ==2) { return Apache2::RequestUtil->request->args; } else { # CGI mode, not mod_perl return $ENV{QUERY_STRING} || $ENV{REDIRECT_QUERY_STRING}; } } # this is a POST my $content_length = $ENV{CONTENT_LENGTH} || 0; _read_from_client(\*STDIN, \$query_string, $content_length, 0) if $content_length > 0; # Have our cake and eat it too! (see CGI.pm) # Append query string contents to the POST data. if ($ENV{QUERY_STRING}) { $query_string .= (length($query_string) ? '&' : '') . $ENV{QUERY_STRING}; } return $query_string; } sub _read_from_client { my($fh, $buff, $len, $offset) = @_; local $^W=0; # prevent a warning return undef unless defined($fh); return read($fh, $$buff, $len, $offset); } # Note: multiple parameters on cmdline are always linked with ampersand; # so better not change DELIM for this input style. sub _read_from_cmdline { my($input,@words); my($query_string); if (@ARGV) { @words = @ARGV; } else { my @lines; chomp(@lines = ); # remove newlines $input = join(" ",@lines); @words = _shellwords($input); } foreach (@words) { s/\\=/%3D/g; s/\\&/%26/g; } if ("@words"=~/=/) { $query_string = join('&',@words); } else { $query_string = join('+',@words); } return $query_string; } # Taken from shellwords.pl in the Perl 5.6 distribution. # # Usage: # @words = &shellwords($line); # or # @words = &shellwords(@lines); # or # @words = &shellwords; # defaults to $_ (and clobbers it) sub _shellwords { local ($_) = join('', @_) if @_; my (@words, $snippet, $field); s/^\s+//; if ($_ ne '') { $field = ''; for (;;) { if (s/^"(([^"\\]|\\.)*)"//) { ($snippet = $1) =~ s#\\(.)#$1#g; } elsif (/^"/) { die "Unmatched double quote: $_\n"; } elsif (s/^'(([^'\\]|\\.)*)'//) { ($snippet = $1) =~ s#\\(.)#$1#g; } elsif (/^'/) { die "Unmatched single quote: $_\n"; } elsif (s/^\\(.)//) { $snippet = $1; } elsif (s/^([^\s\\'"]+)//) { $snippet = $1; } else { s/^\s+//; last; } $field .= $snippet; } push(@words, $field); } @words; } 1; =head1 AUTHOR Jason W. May =head1 COPYRIGHT Copyright (C) 2002-2005 Jason W. May. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut