package Email::MIME::ContentType; require 5.006; use base 'Exporter'; our @EXPORT = qw(parse_content_type); use strict; use Carp; use warnings; our $VERSION = '1.0'; my $tspecials = quotemeta '()<>@,;:\\"/[]?='; my $ct_default = 'text/plain; charset=us-ascii'; my $extract_quoted = qr/(?:\"(?:[^\\\"]*(?:\\.[^\\\"]*)*)\"|\'(?:[^\\\']*(?:\\.[^\\\']*)*)\')/; # For documentation, really: { my $discrete = qr/[^$tspecials]+/; my $composite = qr/[^$tspecials]+/; my $params = qr/;.*/; sub parse_content_type { # XXX This does not take note of RFC2822 comments my $ct = shift; $ct =~ m[ ^ ($discrete) / ($composite) \s* ($params)? $ ]x or return parse_content_type($ct_default); # It is also recommend (sic.) that this default be assumed when a # syntactically invalid Content-Type header field is encountered. return { discrete => lc $1, composite => lc $2, attributes => _parse_attributes($3) }; } } sub _parse_attributes { local $_ = shift; my $attribs = {}; while ($_) { s/^;//; s/^\s+// and next; s/^([^$tspecials]+)=// or do { carp "Illegal Content-Type parameter $_"; return $attribs; }; my $attribute = lc $1; my $value = _extract_ct_attribute_value(); $attribs->{$attribute} = $value; } return $attribs; } sub _extract_ct_attribute_value { # EXPECTS AND MODIFIES $_ my $value; while ($_) { s/^([^$tspecials]+)// and $value .= $1; s/^($extract_quoted)// and do { my $sub = $1; $sub =~ s/^["']//; $sub =~ s/["']$//; $value .= $sub; }; /^;/ and last; /^([$tspecials])/ and do { carp "Unquoted $1 not allowed in Content-Type!"; return; } } return $value; } 1; __END__ # Below is stub documentation for your module. You better edit it! =head1 NAME Email::MIME::ContentType - Parse a MIME Content-Type Header =head1 SYNOPSIS use Email::MIME::ContentType; my $ct = "Content-Type: text/plain; charset="us-ascii"; format=flowed"; my $data = parse_content_type($ct); $data = { discrete => "text", composite => "plain", attributes => { charset => "us-ascii", frormat => "flowed" } } =head1 DESCRIPTION This module is responsible for parsing email content type headers according to section 5.1 of RFC 2045. It returns a hash as above, with entries for the discrete type, the composite type, and a hash of attributes. =head2 EXPORT C =head1 AUTHOR Simon Cozens, C =head1 SEE ALSO L =cut