#line 1 package HTTP::Message; use strict; use vars qw($VERSION $AUTOLOAD); $VERSION = "5.831"; require HTTP::Headers; require Carp; my $CRLF = "\015\012"; # "\r\n" is not portable $HTTP::URI_CLASS ||= $ENV{PERL_HTTP_URI_CLASS} || "URI"; eval "require $HTTP::URI_CLASS"; die $@ if $@; *_utf8_downgrade = defined(&utf8::downgrade) ? sub { utf8::downgrade($_[0], 1) or Carp::croak("HTTP::Message content must be bytes") } : sub { }; sub new { my($class, $header, $content) = @_; if (defined $header) { Carp::croak("Bad header argument") unless ref $header; if (ref($header) eq "ARRAY") { $header = HTTP::Headers->new(@$header); } else { $header = $header->clone; } } else { $header = HTTP::Headers->new; } if (defined $content) { _utf8_downgrade($content); } else { $content = ''; } bless { '_headers' => $header, '_content' => $content, }, $class; } sub parse { my($class, $str) = @_; my @hdr; while (1) { if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) { push(@hdr, $1, $2); $hdr[-1] =~ s/\r\z//; } elsif (@hdr && $str =~ s/^([ \t].*)\n?//) { $hdr[-1] .= "\n$1"; $hdr[-1] =~ s/\r\z//; } else { $str =~ s/^\r?\n//; last; } } local $HTTP::Headers::TRANSLATE_UNDERSCORE; new($class, \@hdr, $str); } sub clone { my $self = shift; my $clone = HTTP::Message->new($self->headers, $self->content); $clone->protocol($self->protocol); $clone; } sub clear { my $self = shift; $self->{_headers}->clear; $self->content(""); delete $self->{_parts}; return; } sub protocol { shift->_elem('_protocol', @_); } sub headers { my $self = shift; # recalculation of _content might change headers, so we # need to force it now $self->_content unless exists $self->{_content}; $self->{_headers}; } sub headers_as_string { shift->headers->as_string(@_); } sub content { my $self = $_[0]; if (defined(wantarray)) { $self->_content unless exists $self->{_content}; my $old = $self->{_content}; $old = $$old if ref($old) eq "SCALAR"; &_set_content if @_ > 1; return $old; } if (@_ > 1) { &_set_content; } else { Carp::carp("Useless content call in void context") if $^W; } } sub _set_content { my $self = $_[0]; _utf8_downgrade($_[1]); if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") { ${$self->{_content}} = $_[1]; } else { die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR"; $self->{_content} = $_[1]; delete $self->{_content_ref}; } delete $self->{_parts} unless $_[2]; } sub add_content { my $self = shift; $self->_content unless exists $self->{_content}; my $chunkref = \$_[0]; $chunkref = $$chunkref if ref($$chunkref); # legacy _utf8_downgrade($$chunkref); my $ref = ref($self->{_content}); if (!$ref) { $self->{_content} .= $$chunkref; } elsif ($ref eq "SCALAR") { ${$self->{_content}} .= $$chunkref; } else { Carp::croak("Can't append to $ref content"); } delete $self->{_parts}; } sub add_content_utf8 { my($self, $buf) = @_; utf8::upgrade($buf); utf8::encode($buf); $self->add_content($buf); } sub content_ref { my $self = shift; $self->_content unless exists $self->{_content}; delete $self->{_parts}; my $old = \$self->{_content}; my $old_cref = $self->{_content_ref}; if (@_) { my $new = shift; Carp::croak("Setting content_ref to a non-ref") unless ref($new); delete $self->{_content}; # avoid modifying $$old $self->{_content} = $new; $self->{_content_ref}++; } $old = $$old if $old_cref; return $old; } sub content_charset { my $self = shift; if (my $charset = $self->content_type_charset) { return $charset; } # time to start guessing my $cref = $self->decoded_content(ref => 1, charset => "none"); # Unicode BOM local $_; for ($$cref) { return "UTF-8" if /^\xEF\xBB\xBF/; return "UTF-32-LE" if /^\xFF\xFE\x00\x00/; return "UTF-32-BE" if /^\x00\x00\xFE\xFF/; return "UTF-16-LE" if /^\xFF\xFE/; return "UTF-16-BE" if /^\xFE\xFF/; } if ($self->content_is_xml) { # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing # XML entity not accompanied by external encoding information and not # in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration, # in which the first characters must be ')/) { if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) { my $enc = $2; $enc =~ s/^\s+//; $enc =~ s/\s+\z//; return $enc if $enc; } } } return "UTF-8"; } elsif ($self->content_is_html) { # look for or # http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding my $charset; require HTML::Parser; my $p = HTML::Parser->new( start_h => [sub { my($tag, $attr, $self) = @_; $charset = $attr->{charset}; unless ($charset) { # look at $attr->{content} ... if (my $c = $attr->{content}) { require HTTP::Headers::Util; my @v = HTTP::Headers::Util::split_header_words($c); my($ct, undef, %ct_param) = @{$v[0]}; $charset = $ct_param{charset}; } return unless $charset; } if ($charset =~ /^utf-?16/i) { # converted document, assume UTF-8 $charset = "UTF-8"; } $self->eof; }, "tagname, attr, self"], report_tags => [qw(meta)], utf8_mode => 1, ); $p->parse($$cref); return $charset if $charset; } if ($self->content_type =~ /^text\//) { for ($$cref) { if (length) { return "US-ASCII" unless /[\x80-\xFF]/; require Encode; eval { Encode::decode_utf8($_, Encode::FB_CROAK()); }; return "UTF-8" unless $@; return "ISO-8859-1"; } } } return undef; } sub decoded_content { my($self, %opt) = @_; my $content_ref; my $content_ref_iscopy; eval { $content_ref = $self->content_ref; die "Can't decode ref content" if ref($content_ref) ne "SCALAR"; if (my $h = $self->header("Content-Encoding")) { $h =~ s/^\s+//; $h =~ s/\s+$//; for my $ce (reverse split(/\s*,\s*/, lc($h))) { next unless $ce; next if $ce eq "identity"; if ($ce eq "gzip" || $ce eq "x-gzip") { require Compress::Zlib; unless ($content_ref_iscopy) { # memGunzip is documented to destroy its buffer argument my $copy = $$content_ref; $content_ref = \$copy; $content_ref_iscopy++; } $content_ref = \Compress::Zlib::memGunzip($$content_ref); die "Can't gunzip content" unless defined $$content_ref; } elsif ($ce eq "x-bzip2") { require Compress::Bzip2; my $i = Compress::Bzip2::bzinflateInit() or die "Can't init bzip2 inflater: $Compress::Bzip2::bzerrno"; unless ($content_ref_iscopy) { # the $i->bzinflate method is documented to destroy its # buffer argument my $copy = $$content_ref; $content_ref = \$copy; $content_ref_iscopy++; } # TODO: operate on the ref when rt#48124 is fixed my ($out, $status) = $i->bzinflate($$content_ref); my $bzerr = ""; # TODO: drop $out definedness part when rt#48124 is fixed if (!defined($out) && $status != Compress::Bzip2::BZ_STREAM_END()) { if ($status == Compress::Bzip2::BZ_OK()) { $self->push_header("Client-Warning" => "Content might be truncated; incomplete bzip2 stream"); } else { # something went bad, can't trust $out any more $out = undef; # $bzerrno has more info than $i->bzerror or $status $bzerr = ": $Compress::Bzip2::bzerrno"; } } die "Can't bunzip content$bzerr" unless defined $out; $content_ref = \$out; $content_ref_iscopy++; } elsif ($ce eq "deflate") { require Compress::Zlib; my $out = Compress::Zlib::uncompress($$content_ref); unless (defined $out) { # "Content-Encoding: deflate" is supposed to mean the "zlib" # format of RFC 1950, but Microsoft got that wrong, so some # servers sends the raw compressed "deflate" data. This # tries to inflate this format. unless ($content_ref_iscopy) { # the $i->inflate method is documented to destroy its # buffer argument my $copy = $$content_ref; $content_ref = \$copy; $content_ref_iscopy++; } my($i, $status) = Compress::Zlib::inflateInit( WindowBits => -Compress::Zlib::MAX_WBITS(), ); my $OK = Compress::Zlib::Z_OK(); die "Can't init inflate object" unless $i && $status == $OK; ($out, $status) = $i->inflate($content_ref); if ($status != Compress::Zlib::Z_STREAM_END()) { if ($status == $OK) { $self->push_header("Client-Warning" => "Content might be truncated; incomplete deflate stream"); } else { # something went bad, can't trust $out any more $out = undef; } } } die "Can't inflate content" unless defined $out; $content_ref = \$out; $content_ref_iscopy++; } elsif ($ce eq "compress" || $ce eq "x-compress") { die "Can't uncompress content"; } elsif ($ce eq "base64") { # not really C-T-E, but should be harmless require MIME::Base64; $content_ref = \MIME::Base64::decode($$content_ref); $content_ref_iscopy++; } elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless require MIME::QuotedPrint; $content_ref = \MIME::QuotedPrint::decode($$content_ref); $content_ref_iscopy++; } else { die "Don't know how to decode Content-Encoding '$ce'"; } } } if ($self->content_is_text || $self->content_is_xml) { my $charset = lc( $opt{charset} || $self->content_type_charset || $opt{default_charset} || $self->content_charset || "ISO-8859-1" ); unless ($charset =~ /^(?:none|us-ascii|iso-8859-1)\z/) { require Encode; if (do{my $v = $Encode::VERSION; $v =~ s/_//g; $v} < 2.0901 && !$content_ref_iscopy) { # LEAVE_SRC did not work before Encode-2.0901 my $copy = $$content_ref; $content_ref = \$copy; $content_ref_iscopy++; } $content_ref = \Encode::decode($charset, $$content_ref, ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC()); die "Encode::decode() returned undef improperly" unless defined $$content_ref; } } }; if ($@) { Carp::croak($@) if $opt{raise_error}; return undef; } return $opt{ref} ? $content_ref : $$content_ref; } sub decodable { # should match the Content-Encoding values that decoded_content can deal with my $self = shift; my @enc; # XXX preferably we should determine if the modules are available without loading # them here eval { require Compress::Zlib; push(@enc, "gzip", "x-gzip", "deflate"); }; eval { require Compress::Bzip2; push(@enc, "x-bzip2"); }; # we don't care about announcing the 'identity', 'base64' and # 'quoted-printable' stuff return wantarray ? @enc : join(", ", @enc); } sub decode { my $self = shift; return 1 unless $self->header("Content-Encoding"); if (defined(my $content = $self->decoded_content(charset => "none"))) { $self->remove_header("Content-Encoding", "Content-Length", "Content-MD5"); $self->content($content); return 1; } return 0; } sub encode { my($self, @enc) = @_; Carp::croak("Can't encode multipart/* messages") if $self->content_type =~ m,^multipart/,; Carp::croak("Can't encode message/* messages") if $self->content_type =~ m,^message/,; return 1 unless @enc; # nothing to do my $content = $self->content; for my $encoding (@enc) { if ($encoding eq "identity") { # nothing to do } elsif ($encoding eq "base64") { require MIME::Base64; $content = MIME::Base64::encode($content); } elsif ($encoding eq "gzip" || $encoding eq "x-gzip") { require Compress::Zlib; $content = Compress::Zlib::memGzip($content); } elsif ($encoding eq "deflate") { require Compress::Zlib; $content = Compress::Zlib::compress($content); } elsif ($encoding eq "x-bzip2") { require Compress::Bzip2; my $d = Compress::Bzip2::bzdeflateInit() or die "Can't init bzip2 deflater: $Compress::Bzip2::bzerrno"; ($content, my $status) = $d->bzdeflate($content); die "Can't bzip content: $Compress::Bzip2::bzerrno" unless $status == Compress::Bzip2::BZ_OK(); (my $rest, $status) = $d->bzclose; die "Can't bzip content: $Compress::Bzip2::bzerrno" unless $status == Compress::Bzip2::BZ_OK(); $content .= $rest if defined $rest; } elsif ($encoding eq "rot13") { # for the fun of it $content =~ tr/A-Za-z/N-ZA-Mn-za-m/; } else { return 0; } } my $h = $self->header("Content-Encoding"); unshift(@enc, $h) if $h; $self->header("Content-Encoding", join(", ", @enc)); $self->remove_header("Content-Length", "Content-MD5"); $self->content($content); return 1; } sub as_string { my($self, $eol) = @_; $eol = "\n" unless defined $eol; # The calculation of content might update the headers # so we need to do that first. my $content = $self->content; return join("", $self->{'_headers'}->as_string($eol), $eol, $content, (@_ == 1 && length($content) && $content !~ /\n\z/) ? "\n" : "", ); } sub dump { my($self, %opt) = @_; my $content = $self->content; my $chopped = 0; if (!ref($content)) { my $maxlen = $opt{maxlength}; $maxlen = 512 unless defined($maxlen); if ($maxlen && length($content) > $maxlen * 1.1 + 3) { $chopped = length($content) - $maxlen; $content = substr($content, 0, $maxlen) . "..."; } $content =~ s/\\/\\\\/g; $content =~ s/\t/\\t/g; $content =~ s/\r/\\r/g; # no need for 3 digits in escape for these $content =~ s/([\0-\11\13-\037])(?!\d)/sprintf('\\%o',ord($1))/eg; $content =~ s/([\0-\11\13-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg; $content =~ s/([^\12\040-\176])/sprintf('\\x{%X}',ord($1))/eg; # remaining whitespace $content =~ s/( +)\n/("\\40" x length($1)) . "\n"/eg; $content =~ s/(\n+)\n/("\\n" x length($1)) . "\n"/eg; $content =~ s/\n\z/\\n/; my $no_content = "(no content)"; if ($content eq $no_content) { # escape our $no_content marker $content =~ s/^(.)/sprintf('\\x%02X',ord($1))/eg; } elsif ($content eq "") { $content = "(no content)"; } } my @dump; push(@dump, $opt{preheader}) if $opt{preheader}; push(@dump, $self->{_headers}->as_string, $content); push(@dump, "(+ $chopped more bytes not shown)") if $chopped; my $dump = join("\n", @dump, ""); $dump =~ s/^/$opt{prefix}/gm if $opt{prefix}; print $dump unless defined wantarray; return $dump; } sub parts { my $self = shift; if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) { $self->_parts; } my $old = $self->{_parts}; if (@_) { my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_; my $ct = $self->content_type || ""; if ($ct =~ m,^message/,) { Carp::croak("Only one part allowed for $ct content") if @parts > 1; } elsif ($ct !~ m,^multipart/,) { $self->remove_content_headers; $self->content_type("multipart/mixed"); } $self->{_parts} = \@parts; _stale_content($self); } return @$old if wantarray; return $old->[0]; } sub add_part { my $self = shift; if (($self->content_type || "") !~ m,^multipart/,) { my $p = HTTP::Message->new($self->remove_content_headers, $self->content("")); $self->content_type("multipart/mixed"); $self->{_parts} = []; if ($p->headers->header_field_names || $p->content ne "") { push(@{$self->{_parts}}, $p); } } elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") { $self->_parts; } push(@{$self->{_parts}}, @_); _stale_content($self); return; } sub _stale_content { my $self = shift; if (ref($self->{_content}) eq "SCALAR") { # must recalculate now $self->_content; } else { # just invalidate cache delete $self->{_content}; delete $self->{_content_ref}; } } # delegate all other method calls the the headers object. sub AUTOLOAD { my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2); # We create the function here so that it will not need to be # autoloaded the next time. no strict 'refs'; *$method = sub { shift->headers->$method(@_) }; goto &$method; } sub DESTROY {} # avoid AUTOLOADing it # Private method to access members in %$self sub _elem { my $self = shift; my $elem = shift; my $old = $self->{$elem}; $self->{$elem} = $_[0] if @_; return $old; } # Create private _parts attribute from current _content sub _parts { my $self = shift; my $ct = $self->content_type; if ($ct =~ m,^multipart/,) { require HTTP::Headers::Util; my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type")); die "Assert" unless @h; my %h = @{$h[0]}; if (defined(my $b = $h{boundary})) { my $str = $self->content; $str =~ s/\r?\n--\Q$b\E--\r?\n.*//s; if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) { $self->{_parts} = [map HTTP::Message->parse($_), split(/\r?\n--\Q$b\E\r?\n/, $str)] } } } elsif ($ct eq "message/http") { require HTTP::Request; require HTTP::Response; my $content = $self->content; my $class = ($content =~ m,^(HTTP/.*)\n,) ? "HTTP::Response" : "HTTP::Request"; $self->{_parts} = [$class->parse($content)]; } elsif ($ct =~ m,^message/,) { $self->{_parts} = [ HTTP::Message->parse($self->content) ]; } $self->{_parts} ||= []; } # Create private _content attribute from current _parts sub _content { my $self = shift; my $ct = $self->{_headers}->header("Content-Type") || "multipart/mixed"; if ($ct =~ m,^\s*message/,i) { _set_content($self, $self->{_parts}[0]->as_string($CRLF), 1); return; } require HTTP::Headers::Util; my @v = HTTP::Headers::Util::split_header_words($ct); Carp::carp("Multiple Content-Type headers") if @v > 1; @v = @{$v[0]}; my $boundary; my $boundary_index; for (my @tmp = @v; @tmp;) { my($k, $v) = splice(@tmp, 0, 2); if ($k eq "boundary") { $boundary = $v; $boundary_index = @v - @tmp - 1; last; } } my @parts = map $_->as_string($CRLF), @{$self->{_parts}}; my $bno = 0; $boundary = _boundary() unless defined $boundary; CHECK_BOUNDARY: { for (@parts) { if (index($_, $boundary) >= 0) { # must have a better boundary $boundary = _boundary(++$bno); redo CHECK_BOUNDARY; } } } if ($boundary_index) { $v[$boundary_index] = $boundary; } else { push(@v, boundary => $boundary); } $ct = HTTP::Headers::Util::join_header_words(@v); $self->{_headers}->header("Content-Type", $ct); _set_content($self, "--$boundary$CRLF" . join("$CRLF--$boundary$CRLF", @parts) . "$CRLF--$boundary--$CRLF", 1); } sub _boundary { my $size = shift || return "xYzZY"; require MIME::Base64; my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), ""); $b =~ s/[\W]/X/g; # ensure alnum only $b; } 1; __END__