package CGI::BasePlus; require 5.001; use CGI::Base; use URI::Escape qw(uri_escape uri_unescape); use CGI::Carp; @ISA = qw(CGI::Base); $revision='$Id: BasePlus.pm,v 2.76 1997/4/5 08:20:00 lstein Exp $'; ($VERSION=$revision)=~s/.*(\d+\.\d+).*/$1/; =head1 NAME CGI::BasePlus - HTTP CGI Base Class with Handling of Multipart Forms =head1 DESCRIPTION This module implements a CGI::BasePlus object that is identical in behavior to CGI::Base except that it provides special handling for postings of MIME type multipart/form-data (which may get very long). In the case of these types of postings, parts that are described as being from a file upload are copied into a temporary file in /usr/tmp, a filehandle is opened on the temporary files, and the name of the filehandle is returned to the caller in the $CGI::Base:QUERY_STRING variable. Please see L for more information. =head2 SEE ALSO URI::URL, CGI::Request, CGI::MiniSvr, CGI::Base =cut ; ############ SUPPORT ROUTINES FOR THE NEW MULTIPART ENCODING ########## package MultipartBuffer; # how many bytes to read at a time. We use # a 5K buffer by default. $FILLUNIT = 1024 * 5; $TIMEOUT = 10*60; # 10 minute timeout $SPIN_LOOP_MAX = 1000; # bug fix for some Netscape servers $CRLF="\015\012"; sub new { my($package,$boundary,$length,$filehandle) = @_; my $IN; if ($filehandle) { my($package) = caller; # force into caller's package if necessary $IN = $filehandle=~/[':]/ ? $filehandle : "$package\:\:$filehandle"; } $IN = "main::STDIN" unless $IN; binmode($IN); # Netscape seems to be a little bit unreliable # about providing boundary strings. if ($boundary) { # Under the MIME spec, the boundary consists of the # characters "--" PLUS the Boundary string $boundary = "--$boundary"; # Read the topmost (boundary) line plus the CRLF my($null) = ''; $length -= read($IN,$null,length($boundary)+2,0); } else { # otherwise we find it ourselves my($old); ($old,$/) = ($/,$CRLF); # read a CRLF-delimited line $boundary = <$IN>; # BUG: This won't work correctly under mod_perl $length -= length($boundary); chomp($boundary); # remove the CRLF $/ = $old; # restore old line separator } my $self = {LENGTH=>$length, BOUNDARY=>$boundary, IN=>$IN, BUFFER=>'', }; $FILLUNIT = length($boundary) if length($boundary) > $FILLUNIT; return bless $self,$package; } # This reads and returns the header as an associative array. # It looks for the pattern CRLF/CRLF to terminate the header. sub readHeader { my($self) = @_; my($end); my($ok) = 0; do { $self->fillBuffer($FILLUNIT); $ok++ if ($end = index($self->{BUFFER},"${CRLF}${CRLF}")) >= 0; $ok++ if $self->{BUFFER} eq ''; $FILLUNIT *= 2 if length($self->{BUFFER}) >= $FILLUNIT; } until $ok; my($header) = substr($self->{BUFFER},0,$end+2); substr($self->{BUFFER},0,$end+4) = ''; my %return; while ($header=~/^([\w-]+): (.*)$CRLF/mog) { $return{$1}=$2; } return %return; } # This reads and returns the body as a single scalar value. sub readBody { my($self) = @_; my($data); my($returnval)=''; while (defined($data = $self->read)) { $returnval .= $data; } return $returnval; } # This will read $bytes or until the boundary is hit, whichever happens # first. After the boundary is hit, we return undef. The next read will # skip over the boundary and begin reading again; sub read { my($self,$bytes) = @_; # default number of bytes to read $bytes = $bytes || $FILLUNIT; # Fill up our internal buffer in such a way that the boundary # is never split between reads. $self->fillBuffer($bytes); # Find the boundary in the buffer (it may not be there). my $start = index($self->{BUFFER},$self->{BOUNDARY}); # If the boundary begins the data, then skip past it # and return undef. The +2 here is a fiendish plot to # remove the CR/LF pair at the end of the boundary. if ($start == 0) { # clear us out completely if we've hit the last boundary. if (index($self->{BUFFER},"$self->{BOUNDARY}--")==0) { $self->{BUFFER}=''; $self->{LENGTH}=0; return undef; } # just remove the boundary. substr($self->{BUFFER},0,length($self->{BOUNDARY})+2)=''; return undef; } my $bytesToReturn; if ($start > 0) { # read up to the boundary $bytesToReturn = $start > $bytes ? $bytes : $start; } else { # read the requested number of bytes # leave enough bytes in the buffer to allow us to read # the boundary. Thanks to Kevin Hendrick for finding # this one. $bytesToReturn = $bytes - (length($self->{BOUNDARY})+1); } my $returnval=substr($self->{BUFFER},0,$bytesToReturn); substr($self->{BUFFER},0,$bytesToReturn)=''; # If we hit the boundary, remove the CRLF from the end. return ($start > 0) ? substr($returnval,0,-2) : $returnval; } # This fills up our internal buffer in such a way that the # boundary is never split between reads sub fillBuffer { my($self,$bytes) = @_; return unless $self->{LENGTH}; my($boundaryLength) = length($self->{BOUNDARY}); my($bufferLength) = length($self->{BUFFER}); my($bytesToRead) = $bytes - $bufferLength + $boundaryLength + 2; $bytesToRead = $self->{LENGTH} if $self->{LENGTH} < $bytesToRead; # Try to read some data. We may hang here if the browser is screwed up. my $bytesRead = read($self->{IN},$self->{BUFFER},$bytesToRead,$bufferLength); # An apparent bug in the Netscape Commerce server causes the read() # to return zero bytes repeatedly without blocking if the # remote user aborts during a file transfer. I don't know how # they manage this, but the workaround is to abort if we get # more than SPIN_LOOP_MAX consecutive zero reads. if ($bytesRead == 0) { die "CGI::BasePlus: Server closed socket during multipart read (client aborted?).\n" if ($self->{ZERO_LOOP_COUNTER}++ >= $SPIN_LOOP_MAX); } else { $self->{ZERO_LOOP_COUNTER}=0; } $self->{LENGTH} -= $bytesRead; } # Return true when we've finished reading sub eof { my($self) = @_; return 1 if (length($self->{BUFFER}) == 0) && ($self->{LENGTH} <= 0); } package TempFile; @TEMP=('/usr/tmp','/var/tmp','/tmp',); unshift(@TEMP,$ENV{TMPDIR}) if defined($ENV{TMPDIR}); foreach (@TEMP) { do {$TMPDIRECTORY = $_; last} if -w $_; } $TMPDIRECTORY = "." unless $TMPDIRECTORY; $SEQUENCE="CGItemp${$}0000"; # cute feature, but no longer supported # %OVERLOAD = ('""'=>'as_string'); # Create a temporary file that will be automatically # unlinked when finished. sub new { my($package) = @_; $SEQUENCE++; my $directory = "${TMPDIRECTORY}/${SEQUENCE}"; return bless \$directory; } sub DESTROY { my($self) = @_; unlink $$self; # get rid of the file } sub as_string { my($self) = @_; return $$self; } ############ OVERRIDDEN ROUTINES IN CGI::Base ########## package CGI::BasePlus; # Read entity body in such a way that file uploads are stored # to temporary disk files. See below. sub read_post_body { my $self = shift; # Use parent's read_post_body() method unless we have a # new multipart/form-data type of body to deal with. return &CGI::Base::read_post_body($self) unless $CGI::Base::CONTENT_TYPE =~ m|^multipart/form-data|; # Handle multipart/form-data postings. For compatability # with the Request.pm module, the name/value pairs are # converted into canonical (URL-encoded) form and stored # into $CGI::Base::QUERY_STRING. my($boundary) = $ENV{'CONTENT_TYPE'}=~/boundary=(\S+)/; $self->read_multipart($boundary,$ENV{'CONTENT_LENGTH'}); } sub read_multipart { my($self,$boundary,$length) = @_; my($buffer) = new MultipartBuffer($boundary,$length); my(%header,$body); while (!$buffer->eof) { %header = $buffer->readHeader; # In beta1 it was "Content-disposition". In beta2 it's "Content-Disposition" # Sheesh. my($key) = $header{'Content-disposition'} ? 'Content-disposition' : 'Content-Disposition'; my($param) = $header{$key}=~/ name="(.*?)"/; my($filename) = $header{$key}=~/ filename="(.*?)"/; my($value); if ($filename) { # If we get here, then we are dealing with a potentially large # uploaded file. Save the data to a temporary file, then open # the file for reading, and stash the filehandle name inside # the query string. my($tmpfile) = new TempFile; my $tmp = $tmpfile->as_string; open (OUT,">$tmp") || croak "CGI open of $tmpfile: $!\n"; chmod 0666,$tmp; # make sure anyone can delete it. binmode(OUT); my $data; while ($data = $buffer->read) { print OUT $data; } close OUT; # Now create a new filehandle in the caller's namespace. # The name of this filehandle just happens to be identical # to the original filename (NOT the name of the temporary # file, which is hidden!) my($filehandle); if ($filename=~/^[a-zA-Z_]/) { my($frame,$cp) = (1); do { $cp = caller($frame++); } until $cp!~/^CGI/; $filehandle = "$cp\:\:$filename"; } else { $filehandle = "\:\:$filename"; } warn "Filehandle = $filehandle tmpfile = $tmp"; open($filehandle,$tmp) || croak "CGI open of $tmpfile: $!\n"; binmode($filehandle); $value = $filename; # Under Unix, it is safe to let the temporary file be deleted # when it goes out of scope. The storage is not deallocated # until the last file descriptor is closed. So we do nothing # special here. } # If we get here then we're dealing a non-file form field, which we # will assume can fit into memory OK. else { $value = $buffer->readBody; } # Now we store the parameter name and the value into our # query string for later retrieval $CGI::Base::QUERY_STRING .= '&' if $CGI::Base::QUERY_STRING; $CGI::Base::QUERY_STRING .= uri_escape($param) . '=' . uri_escape($value); } 1; } $VERSION; # prevent spurious warning message