The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
##---------------------------------------------------------------------------##
##  File:
##	$Id: mhutil.pl,v 2.34 2011/01/02 08:42:32 ehood Exp $
##  Author:
##      Earl Hood       mhonarc@mhonarc.org
##  Description:
##      Utility routines for MHonArc
##---------------------------------------------------------------------------##
##    MHonArc -- Internet mail-to-HTML converter
##    Copyright (C) 1995-1999	Earl Hood, mhonarc@mhonarc.org
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##    02111-1307, USA
##---------------------------------------------------------------------------##

package mhonarc;

use MHonArc::RFC822;

## RFC 2369 header fields to check for URLs
%HFieldsList = (
    'list-archive'  	=> 1,
    'list-help'  	=> 1,
    'list-owner'  	=> 1,
    'list-post'  	=> 1,
    'list-subscribe'  	=> 1,
    'list-unsubscribe' 	=> 1,
);

## Do not apply ADDRESSMODIFYCODE headerfields
%HFieldsAsIsList = (
    %HFieldsList,
    'content-disposition' => 1,
    'content-id'          => 1,
    'content-type'        => 1,
    'message-id'          => 1,
    'references'          => 1,
    'in-reply-to'         => 1,
);

## Header fields that contain addresses
%HFieldsAddr = (
    'apparently-from'	=> 1,
    'apparently-to'	=> 1,
    'bcc'		=> 1,
    'cc'		=> 1,
    'dcc'		=> 1,
    'from'		=> 1,
    'mail-followup-to'	=> 1,
    'mail-reply-to'	=> 1,
    'notify-bcc'	=> 1,
    'notify-cc' 	=> 1,
    'notify-to' 	=> 1,
    'original-bcc'	=> 1,
    'original-cc'	=> 1,
    'original-from'	=> 1,
    'original-sender'	=> 1,
    'original-to'	=> 1,
    'reply-to'		=> 1,
    'resent-bcc'	=> 1,
    'resent-cc'		=> 1,
    'resent-from'	=> 1,
    'resent-sender'	=> 1,
    'resent-to'		=> 1,
    'return-path'	=> 1,
    'sender'		=> 1,
    'to'		=> 1,
    'x-envelope'	=> 1,
);

##---------------------------------------------------------------------------
##    Convert message header string to HTML encoded in
##    $readmail::TextEncode encoding.
##
sub htmlize_enc_head {
    my($cnvfunc, $charset) =
	readmail::MAILload_charset_converter($readmail::TextEncode);
    return htmlize($_[0])
	if ($cnvfunc eq '-decode-' || $cnvfunc eq '-ignore-');
    return &$cnvfunc($_[0], $charset);
}

##---------------------------------------------------------------------------
##    Clip text to specified length.
##
sub clip_text {
    my $str      = \shift;  # Prevent unnecessary copy.
    my $len      = shift;   # Clip length
    my $is_html  = shift;   # If entity references should be considered
    my $has_tags = shift;   # If html tags should be stripped

    if (!$is_html) {
      return substr($$str, 0, $len);
    }

    my $text = "";
    my $subtext = "";
    my $html_len = length($$str);
    my($pos, $sublen, $real_len, $semi);
    my $er_len = 0;
    
    for ( $pos=0, $sublen=$len; $pos < $html_len; ) {
	$subtext = substr($$str, $pos, $sublen);
	$pos += $sublen;

	# strip tags
	if ($has_tags) {
	    # Strip full tags
	    $subtext =~ s/<[^>]*>//g;
	    # Check if clipped part of a tag
	    if ($subtext =~ s/<[^>]*\Z//) {
		my $gt = index($$str, '>', $pos);
		$pos = ($gt < 0) ? $html_len : ($gt+1);
	    }
	}

	# check for clipped entity reference
	if (($pos < $html_len) && ($subtext =~ /\&[^;]*\Z/)) {
	    my $semi = index($$str, ';', $pos);
	    if ($semi < 0) {
		# malformed entity reference
		$subtext .= substr($$str, $pos);
		$pos = $html_len;
	    } else {
		$subtext .= substr($$str, $pos, $semi-$pos+1);
		$pos = $semi+1;
	    }
	}

	# compute entity reference lengths to determine "real" character
	# count and not raw character count.
	while ($subtext =~ /(\&[^;]+);/g) {
	    $er_len += length($1);
	}

	$text .= $subtext;

	# done if we have enough
	$real_len = length($text)-$er_len;
	if ($real_len >= $len) {
	    last;
	}
	$sublen = $len - (length($text)-$er_len);
    }
    $text;
}

##---------------------------------------------------------------------------
##	Get an e-mail address from (HTML) $str.
##
sub extract_email_address {
    return ''  unless defined $_[0];
    scalar(MHonArc::RFC822::first_addr_spec(shift));
}

##---------------------------------------------------------------------------
##	Get an e-mail name from $str.
##
sub extract_email_name {
    my @tokens   = MHonArc::RFC822::tokenise(shift);
    my @bare     = ( );
    my $possible = undef;
    my $skip	 = 0;

    my $tok;
    foreach $tok (@tokens) {
	next  if $skip;
	if ($tok =~ /^"/) {   # Quoted string
	    $tok =~ s/^"//;  $tok =~ s/"$//;
            $tok =~ s/\\(.)/$1/g;
	    return $tok;
	}
	if ($tok =~ /^\(/) {  # Comment
	    $tok =~ s/^\(//; $tok =~ s/\)$//;
            $tok =~ s/\\(.)/$1/g;
	    return $tok;
	}
	if ($tok =~ /^<$/) {  # Address spec, skip
	    $skip = 1;
	    next;
	}
	if ($tok =~ /^>$/) {
	    $skip = 0;
	    next;
	}
	push(@bare, $tok);    # Bare name
    }

    my $str;
    if (@bare) {
	$str = join(' ', @bare);
	$str =~ s/@.*//;
	$str =~ s/^\s+//; $str =~ s/\s+$//;
	return $str;
    }
    $str = MHonArc::RFC822::first_addr_spec(@tokens);
    $str =~ s/@.*//;
    $str;
}

##---------------------------------------------------------------------------
##	Routine to sort messages
##
sub sort_messages {
    my($nosort, $subsort, $authsort, $revsort) = @_;
    $nosort   = $NOSORT    if !defined($nosort);
    $subsort  = $SUBSORT   if !defined($subsort);
    $authsort = $AUTHSORT  if !defined($authsort);
    $revsort  = $REVSORT   if !defined($revsort);

    if ($nosort) {
	## Process order
	if ($revsort) {
	    return sort { $IndexNum{$b} <=> $IndexNum{$a} } keys %Subject;
	} else {
	    return sort { $IndexNum{$a} <=> $IndexNum{$b} } keys %Subject;
	}

    } elsif ($subsort) {
	## Subject order
	my(%sub, $idx, $sub);
	use locale;
	eval {
	    my $hs = scalar(%Subject);  $hs =~ s|^[^/]+/||;
	    keys(%sub) = $hs;
	};
	while (($idx, $sub) = each(%Subject)) {
	    $sub = lc $sub;
	    1 while $sub =~ s/$SubReplyRxp//io;
	    $sub =~ s/$SubArtRxp//io;
	    $sub{$idx} = $sub;
	}
	if ($revsort) {
	    return sort { ($sub{$a} cmp $sub{$b}) ||
			  ($Time{$b} <=> $Time{$a})
			} keys %Subject;
	} else {
	    return sort { ($sub{$a} cmp $sub{$b}) ||
			  ($Time{$a} <=> $Time{$b})
			} keys %Subject;
	}
	
    } elsif ($authsort) {
	## Author order
	my(%from, $idx, $from);
	use locale;
	eval {
	    my $hs = scalar(%From);  $hs =~ s|^[^/]+/||;
	    keys(%from) = $hs;
	};
	if ($DoFromName && %FromName) {
	    while (($idx, $from) = each(%FromName)) {
		$from{$idx} = lc $from;
	    }
	} else {
	    while (($idx, $from) = each(%From)) {
		$from{$idx} = lc extract_email_name($from);
	    }
	}
	if ($revsort) {
	    return sort { ($from{$a} cmp $from{$b}) ||
			  ($Time{$b} <=> $Time{$a})
			} keys %Subject;
	} else {
	    return sort { ($from{$a} cmp $from{$b}) ||
			  ($Time{$a} <=> $Time{$a})
			} keys %Subject;
	}

    } else {
	## Date order
	if ($revsort) {
	    return sort { ($Time{$b} <=> $Time{$a})
			  || ($IndexNum{$b} <=> $IndexNum{$a})
			} keys %Subject;
	} else {
	    return sort { ($Time{$a} <=> $Time{$b})
			  || ($IndexNum{$a} <=> $IndexNum{$b})
			} keys %Subject;
	}

    }
}

##---------------------------------------------------------------------------
##	Message-sort routines for sort().
##
sub increase_index {
    (&get_time_from_index($a) <=> &get_time_from_index($b)) ||
	($IndexNum{$a} <=> $IndexNum{$b});
}

##---------------------------------------------------------------------------
##	Routine for formating a message number for use in filenames or links.
##
sub fmt_msgnum {
    sprintf("%05d", $_[0]);
}

##---------------------------------------------------------------------------
##	Routine to get filename of a message number.
##
sub msgnum_filename {
    my($fmtstr) = "$MsgPrefix%05d.$HtmlExt";
    $fmtstr .= ".gz"  if $GzipLinks;
    sprintf($fmtstr, $_[0]);
}

##---------------------------------------------------------------------------
##	Routine to get filename of an index
##
sub get_filename_from_index {
    &msgnum_filename($IndexNum{$_[0]});
}

##---------------------------------------------------------------------------
##	Routine to get time component from index
##
sub get_time_from_index {
    $Time{$_[0]} || (split(/$X/o, $_[0], 2))[0];
}

##---------------------------------------------------------------------------
##	Routine to get annotation of a message
##
sub get_note {
    my $index = shift;
    my $file = join($DIRSEP, get_note_dir(),
			     msgid_to_filename($Index2MsgId{$index}));
    if (!open(NOTEFILE, $file)) { return ""; }
    my $ret = join("", <NOTEFILE>);
    close NOTEFILE;
    $ret;
}

##---------------------------------------------------------------------------
##	Routine to determine if a message has an annotation
##
sub note_exists {
    my $index = shift;
    -e join($DIRSEP, get_note_dir(),
		     msgid_to_filename($Index2MsgId{$index}));
}

##---------------------------------------------------------------------------
##	Routine to get full pathname to annotation directory
##
sub get_note_dir {
    if (!OSis_absolute_path($NoteDir)) {
	return join($DIRSEP, $OUTDIR, $NoteDir);
    }
    $NoteDir;
}

##---------------------------------------------------------------------------
##	Routine to get lc author name from index
##
sub get_base_author {
    if ($DoFromName && %FromName) {
      return lc $FromName{$_[0]};
    }
    lc extract_email_name($From{$_[0]});
}

##---------------------------------------------------------------------------
##	Determine time from date.  Use %Zone for timezone offsets
##
sub get_time_from_date {
    my($mday, $mon, $yr, $hr, $min, $sec, $zone) = @_;
    my($time) = 0;

    $yr -= 1900  if $yr >= 1900;  # if given full 4 digit year
    $yr += 100   if $yr <= 37;    # in case of 2 digit years
    if (($yr < 70) || ($yr > 137)) {
	warn "Warning: Bad year (", $yr+1900, ") using current\n";
	$yr = (localtime(time))[5];
    }

    ## If $zone, grab gmt time, else grab local
    if ($zone) {
	$zone =~ tr/a-z/A-Z/;
	$time = &timegm($sec,$min,$hr,$mday,$mon,$yr);

	# try to modify time/date based on timezone
	OFFSET: {
	    # numeric timezone
	    if ($zone =~ /^[\+-]\d+$/) {
		$time -= &zone_offset_to_secs($zone);
		last OFFSET;
	    }
	    # Zone
	    if (defined($Zone{$zone})) {
		# timezone abbrev
		$time += &zone_offset_to_secs($Zone{$zone});
		last OFFSET;

	    }
	    # Zone[+-]DDDD
	    if ($zone =~ /^([A-Z]\w+)([\+-]\d+)$/) {
		$time -= &zone_offset_to_secs($2);
		if (defined($Zone{$1})) {
		    $time += &zone_offset_to_secs($Zone{$1});
		    last OFFSET;
		}
	    }
	    # undefined timezone
	    warn qq|Warning: Unrecognized time zone, "$zone"\n|;
	}

    } else {
	$time = &timelocal($sec,$min,$hr,$mday,$mon,$yr);
    }
    $time;
}

##---------------------------------------------------------------------------
##	Routine to check if time has expired.
##
sub expired_time {
    ($ExpireTime && (time - $_[0] > $ExpireTime)) ||
    ($_[0] < $ExpireDateTime);
}

##---------------------------------------------------------------------------
##      Get HTML tags for formatting message headers
##
sub get_header_tags {
    my($f) = shift;
    my($ftago, $ftagc, $tago, $tagc);
 
    ## Get user specified tags (this is one funcky looking code)
    $tag = (defined($HeadHeads{$f}) ?
            $HeadHeads{$f} : $HeadHeads{"-default-"});
    $ftag = (defined($HeadFields{$f}) ?
             $HeadFields{$f} : $HeadFields{"-default-"});
    if ($tag) { $tago = "<$tag>";  $tagc = "</$tag>"; }
    else { $tago = $tagc = ''; }
    if ($ftag) { $ftago = "<$ftag>";  $ftagc = "</$ftag>"; }
    else { $ftago = $ftagc = ''; }
 
    ($tago, $tagc, $ftago, $ftagc);
}

##---------------------------------------------------------------------------
##	Format message headers in HTML.
##	$html = htmlize_header($fields_hash_ref);
##
sub htmlize_header {
    my $fields = shift;
    my($key,
       $tago, $tagc,
       $ftago, $ftagc,
       $item,
       @array);
    my($tmp);

    my $mesg = "";
    my %hf = %$fields;
    foreach $item (@FieldOrder) {
	if ($item eq '-extra-') {
	    foreach $key (sort keys %hf) {
		next  if $FieldODefs{$key};
		next  if $key =~ /^x-mha-/;
		delete $hf{$key}, next  if &exclude_field($key);

		@array = @{$hf{$key}};
		foreach $tmp (@array) {
		    $tmp = $HFieldsList{$key} ? mlist_field_add_links($tmp) :
						&$MHeadCnvFunc($tmp);
		    $tmp = field_add_links($key, $tmp, $fields)
			unless $HFieldsAsIsList{$key};
		    ($tago, $tagc, $ftago, $ftagc) = get_header_tags($key);
		    $mesg .= join('', $LABELBEG,
				  $tago, htmlize(ucfirst($key)), $tagc,
				  $LABELEND,
				  $FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
				  "\n");
		}
		delete $hf{$key};
	    }
	} else {
	    if (!&exclude_field($item) && $hf{$item}) {
		@array = @{$hf{$item}};
		foreach $tmp (@array) {
		    $tmp = $HFieldsList{$item} ? mlist_field_add_links($tmp) :
						 &$MHeadCnvFunc($tmp);
		    $tmp = field_add_links($item, $tmp, $fields)
			unless $HFieldsAsIsList{$item};
		    ($tago, $tagc, $ftago, $ftagc) = &get_header_tags($item);
		    $mesg .= join('', $LABELBEG,
				  $tago, htmlize(ucfirst($item)), $tagc,
				  $LABELEND,
				  $FLDBEG, $ftago, $tmp, $ftagc, $FLDEND,
				  "\n");
		}
	    }
	    delete $hf{$item};
	}
    }
    if ($mesg) { $mesg = $FIELDSBEG . $mesg . $FIELDSEND; }
    $mesg;
}

##---------------------------------------------------------------------------

sub mlist_field_add_links {
    my $txt	= shift;
    my $ret	= "";
    local($_);
    foreach (split(/(<[^<>]+>)/, $txt)) {
	if (/^<\w+:/) {
	    chop; substr($_, 0, 1) = "";
	    $ret .= qq|&lt;<a href="$_">$_</a>&gt;|;
	} else {
	    $ret .= &$MHeadCnvFunc($_);
	}
    }
    $ret;
}

##---------------------------------------------------------------------------
##	Routine to add mailto/news links to a message header string.
##
sub field_add_links {
    my $label = lc shift;
    my $fld_text = shift;
    my $fields	 = shift;

    LBLSW: {
	if (!$NONEWS && ($label eq 'newsgroup' || $label eq 'newsgroups')) {
	    $fld_text = newsurl($fld_text, $fields->{'x-mha-message-id'});
	    last LBLSW;
	}
	if (!$NOMAILTO) {
	    $fld_text =~ s{($HAddrExp)}
			  {&mailUrl($1, $fields->{'x-mha-message-id'},
					$fields->{'x-mha-subject'},
					$fields->{'x-mha-from'});
			  }gexo;
	} else {
	    $fld_text =~ s{($HAddrExp)}
			  {&htmlize(&rewrite_address($1))
			  }gexo;
	}
	last LBLSW;
    }
    $fld_text;
}


##---------------------------------------------------------------------------
##	Routine to add news links of newsgroups names
##
sub newsurl {
    my $str     = shift;
    my $msgid_u = urlize(shift);
    my $h = "";

    local $_;
    if ($str =~ s/^([^:]*:\s*)//) {
	$h = $1;
    }
    $str =~ s/[\s<>]//g;
    my @groups = split(/,/, $str);
    my $group;
    foreach $group (@groups) {
	my $url     = $NewsUrl;
	my $group_u = urlize($group);
	$url =~ s/\$NEWSGROUP(?::U)?\$/$group_u/g;
	$url =~ s/\$MSGID(?::U)?\$/$msgid_u/g;
	$group = qq{<a href="$url">$group</a>};
    }
    $h . join(', ', @groups);	# Rejoin string
}

##---------------------------------------------------------------------------
##	$html = mailUrl($email_addr, $msgid, $subject, $from);
##
sub mailUrl {
    my $eaddr = shift || '';
    my $msgid = shift || '';
    my $sub = shift || '';
    my $from = shift || '';
    dehtmlize(\$eaddr);

    local $_;
    my($url) = ($MAILTOURL);
    my($to) = (&urlize($eaddr));
    my($toname, $todomain) = map { urlize($_) } split(/@/,$eaddr,2);
    my($froml, $msgidl) = (&urlize($from), &urlize($msgid));
    my($fromaddrl) = (&extract_email_address($from));
    my($faddrnamel, $faddrdomainl) = map { urlize($_) } split(/@/,$fromaddrl,2);
    $fromaddrl = &urlize($fromaddrl);
    my($subjectl);

    # Add "Re:" to subject if not present
    if ($sub !~ /^$SubReplyRxp/io) {
	$subjectl = 'Re:%20' . &urlize($sub);
    } else {
	$subjectl = &urlize($sub);
    }
    $url =~ s/\$FROM\$/$froml/g;
    $url =~ s/\$FROMADDR\$/$fromaddrl/g;
    $url =~ s/\$FROMADDRNAME\$/$faddrnamel/g;
    $url =~ s/\$FROMADDRDOMAIN\$/$faddrdomainl/g;
    $url =~ s/\$MSGID\$/$msgidl/g;
    $url =~ s/\$SUBJECT(?:NA)?\$/$subjectl/g;
    $url =~ s/\$TO\$/$to/g;
    $url =~ s/\$TOADDRNAME\$/$toname/g;
    $url =~ s/\$TOADDRDOMAIN\$/$todomain/g;
    $url =~ s/\$ADDR\$/$to/g;
    qq|<a href="$url">| . &htmlize(&rewrite_address($eaddr)) . q|</a>|;
}

##---------------------------------------------------------------------------##
##	Routine to parse variable definitions in a string.  The
##	function returns a list of variable/value pairs.  The format of
##	the string is similiar to attribute specification lists in
##	SGML, but NAMEs are any non-whitespace character.
##
sub parse_vardef_str {
    my($org) = shift;
    my($lower) = shift;
    my(%hash) = ();
    my($str, $q, $var, $value);

    ($str = $org) =~ s/^\s+//;
    while ($str =~ s/^([^=\s]+)\s*=\s*//) {
	$var = $1;
	if ($str =~ s/^(['"])//) {
	    $q = $1;
	    if (!($q eq "'" ? $str =~ s/^([^']*)'// :
			      $str =~ s/^([^"]*)"//)) {
		warn "Warning: Unclosed quote in: $org\n";
		return ();
	    }
	    $value = $1;

	} else {
	    if ($str =~ s/^(\S+)//) {
		$value = $1;
	    } else {
		warn "Warning: No value after $var in: $org\n";
		return ();
	    }
	}
	$str =~ s/^\s+//;
	$hash{$lower? lc($var): $var} = $value;
    }
    if ($str =~ /\S/) {
	warn "Warning: Trailing characters in: $org\n";
    }
    %hash;
}

##---------------------------------------------------------------------------##

sub msgid_to_filename {
    my $msgid = shift;
    if ($VMS) {
	$msgid =~ s/([^\w\-])/sprintf("=%02X",unpack("C",$1))/geo;
    } else {
	$msgid =~ s/([^\w.\-\@])/sprintf("=%02X",unpack("C",$1))/geo;
    }
    $msgid;
}

##---------------------------------------------------------------------------##
##	Check if new follow up list for a message is different from
##	old follow up list.
##
sub is_follow_ups_diff {
    my $f	= $Follow{$_[0]};
    my $o	= $FollowOld{$_[0]};
    if (defined($f) && defined($o)) {
	return 1  unless @$f == @$o;
	local $^W = 0;
	my $i;
	for ($i=0; $i < @$f; ++$i) {
	    return 1  if $f->[$i] ne $o->[$i];
	}
	return 0;
    }
    return (defined($f) || defined($o));
}

##---------------------------------------------------------------------------##
##	Retrieve icon URL for specified content-type.
##
sub get_icon_url {
    my $ctype = shift;
    my $icon = $Icons{$ctype};
    ICON: {
	last ICON  if defined $icon;
	if ($ctype =~ s|/.*||) {
	  $ctype .= '/*';
	  $icon = $Icons{$ctype};
	  last ICON  if defined $icon;
	}
	$icon = $Icons{'*/*'} || $Icons{'unknown'};
    }
    if (!defined($icon)) {
	return (undef, undef, undef);
    }
    if ($icon =~ s/\[(\d+)x(\d+)\]//) {
	return ($IconURLPrefix.$icon, $1, $2);
    }
    ($IconURLPrefix.$icon, undef, undef);
}

##---------------------------------------------------------------------------##

sub log_mesg {
    my $fh	= shift;
    my $doDate	= shift;

    if ($doDate) {
	my($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
	print $fh sprintf("[%4d-%02d-%02d %02d:%02d:%02d] ",
			  $year+1900, $mon+1, $mday, $hour, $min, $sec);
    }
    print $fh @_;
}

##---------------------------------------------------------------------------##

sub dump_hash {
    my $fh = shift;
    my $h = shift;
    local $_;
    foreach (sort keys %$h) {
	print $fh "$_ => ", $h->{$_}, "\n";
    }
}

##---------------------------------------------------------------------------##
1;