#!/usr/bin/perl -w
# $Id: t-prot,v 1.51 2002/03/23 10:47:32 jochen Exp $

require 5.005;
use strict;
use Getopt::Mixed qw(nextOption);
use vars qw(
	$VER $REV $REL
	$EX_OK $EX_USAGE $EX_DATAERR $EX_UNAVAILABLE $EX_BOUNCE
	$ad $ads $boun $cr $diff $elli $footers $hdrs $indent $lsig $maxsig
	$mda $ml $ms $mua $ofile $sendmail $sig $sysl $trad $trsp
);


# Version info
$VER 			= '0.54';
$REV 			= '';
$REL 			= q$Revision: 1.51 $; chop($REL);
# From <sysexits.h>
# (you might have to adjust those if not using GNU libc)
$EX_OK 			=  0;
$EX_USAGE 		= 64;
$EX_DATAERR		= 65;
$EX_UNAVAILABLE = 69;
$EX_BOUNCE 		= $EX_UNAVAILABLE;
# Please adjust these vals to your needs:
$maxsig 		= 4;	# max. valid signature length
$indent 		= '>';	# Indent string, regexp to identify a quoted line
$sendmail		= '/usr/sbin/sendmail -oi'; # MTA expecting mail on STDIN
$boun 			= "Blocked by $0: This user does not accept TOFUed email. Please see <http://learn.to/edit_messages/> and <http://www.escape.de/users/tolot/mutt/> for more info. Have a nice day!\n";
$ofile			= '-';	# use STDOUT if nothing is specified
# end of user adjusted vals


# help(): print help text and exit with appropriate exit code
sub help {
    print "Usage: $0 [options] 
  -a              remove ad footers; requires -A
  -A=DIRECTORY    ad footer directory, treat ad footers as signature
  -c              merge multiple blank lines
  -d, --debug     print notice to syslog when bouncing; requires -p
  --diff          tolerate diffs appended *after* the signature
  -e              force ellipsis for excessive punctuation
  -h, --help      show this short help and exit
  -i=INFILE       file to be read; '-' for STDIN (default)
  -L=DIRECTORY    mailling list footer directory, treat mailing list
                  footers as signature
  -l              delete mailing list footer; requires -L
  -M, --mua=MUA   turn on special treatment for some mail user agents
  -m              delete MS style TOFU; careful: might be too agressive
  -o=OUTFILE      file to be written to; '-' for STDOUT (default)
  -P=MESSAGE      user defined bounce message; requires -p
  -p=ADDRESS      redirect to ADDRESS if no TOFU was found
  -r              delete mail header lines
  -S[=n]          supress signatures with more than n lines; 
                  default is $maxsig if n not specified
  -s              delete signature
  -t              delete traditional style TOFU
  -v, --version   show version string and exit
  -w              delete trailing whitespaces\n";
    exit($EX_USAGE);
}

# version(): print version info and exit with appropriate exit code
sub version {
    print "$0 v$VER$REV ($REL), Jochen Striepe <t-prot\@tolot.escape.de>
Get the latest version at <http://www.escape.de/users/tolot/mutt/>\n";
    exit($EX_OK);
}

# remove_footers(): remove any trailing appearance of footers contained
# in the given directory.
sub remove_footers {
	my $L = shift;		# array of message lines
	my $S = shift;		# array to store removed lines in
	my $F = shift;		# footers dir name
	my $O = shift;		# remove only one footer?

	if ($F && scalar(@$L)) {
	    opendir(DIR, $F) || die "Could not open $F: $!";
    	my @feet = grep { /^[^.]/ && -f "$F/$_" } readdir DIR;
	    closedir DIR;

	    foreach my $f (@feet) {
    		open(IN, "$F/$f") || die "Could not open $F/$f: $!";
	        my @l = <IN>;
    	    close IN;

        	while (scalar(@l)<=scalar(@$L)) {
            	my $y = 0;
	            for(my $x=1; $x<=scalar(@l); $x++) {
    	            chomp($l[scalar(@l)-$x]);
        	        if (index($$L[scalar(@$L)-$x], $l[scalar(@l)-$x])!=0) { 
						$y = 1; 
					}
	            }
    	        if (!$y) {
					unshift(@$S, @$L[$#$L-$#l..$#$L]);
					splice(@$L, $#$L-$#l);
					while (scalar(@$L) && $$L[$#$L] =~ /^\s*$/) {
    	                unshift(@$S, pop(@$L));
					}
					if ($O) { last; }
	            }
				else { last; }
        	}
	    }
	}
}

# write_msg(): output
sub write_msg {
	my $O = shift;
	my $l;

	open(OUT, $O) || die "Could not open $O: $!";
	while (scalar(@_)) {
		$l = shift;
		if (defined $l) {
			$^W = 0;
			print OUT @$l;
			$^W = 1;
		}
	}
	close OUT;
}

# process_msg(): This one proc does *everything* what has to be done with
# the lines of the message
sub process_msg {
	my $lines = shift;

	my ($j, $x, $verb) = (0, 0, 0);
	my (@ads, @hdr, @bo1, @bo2, @ftr, @sig, @vrb, @att) = 
		((), (), (), (), (), (), (), (), ());

	# First, remove and store lines we might need later...
	# Remove headers:
	for ($x=0; $x<$#$lines; $x++) { if (@$lines[$x] =~ /^$/) { last; }; }
	@hdr = @$lines[0..$x];
	splice(@$lines, 0, $x+1);

	# See if we have a multipart content type. If yes, see if it is already
	# ripped (e.g. by mutt(1)), otherwise only leave the first part if it
	# is plain text (if not, we are done - non-text messages are not our
	# business).
	if (lc($mua) ne 'mutt') { 
		for ($x=0; $x<scalar(@hdr); $x++) {
			if ($hdr[$x] =~ /^Content-Type:\s+(.*)$/i) {
				my $foo = $1;

				if ($foo =~ /^multipart\//i) {
					undef $foo;

					if ($hdr[$x] =~ /\Wboundary="([^"]+)"/i) { $foo = $1; }
					else { 
						for (my $z=1; $x+$z<@hdr && $hdr[$x+$z]=~/^\s/; $z++) {
							if ($hdr[$x] =~ /\Wboundary="?([\S]+)"?$/i) { 
								$foo = $1;
								last;
							}
						}
					}

					if (defined $foo) {
						for (my $x=0; $x<scalar(@$lines); $x++) {
							if (index($$lines[$x], '--'.$foo)!=0) { next; }

							my $bar = 'text/plain';
	    		            for ($x++; $x<@$lines && $$lines[$x]!~/^$/; $x++)
							{
	        		            if ($$lines[$x] =~ /^Content-Type:\s+(.*)$/i) { 
    	        		            $bar = $1;
								}
							}
							if ($x>=scalar(@$lines)) { exit($EX_DATAERR); }

							if ($bar =~ /^text\/plain/i) {
								my $z;
								for ($z=1; $x+$z<@$lines; $z++) {
									if (index($$lines[$x+$z], '--'.$foo)==0) {
										last;
									}
								}
								if ($x+$z>=scalar(@$lines)) { exit($EX_DATAERR); }

								@bo2 = @$lines[$x+$z..$#$lines];
								splice(@$lines, $x+$z);
								if ($$lines[$#$lines] =~ /^\s*$/) {
									unshift(@bo2, pop @$lines);
								}
								@bo1 = @$lines[0..$x];
								splice(@$lines, 0, $x+1);
								last;
							}
							else { 
								write_msg(($mda?"|$sendmail $mda":">$ofile"),
									($hdrs?undef:\@hdr), $lines);
								exit;
							}
						}
					}
				}
				last;
			}
		} 
	}


	# Protect verbatims:
	$verb = 0;
	for ($x=0; $x<scalar(@$lines); $x++) {
	    if ($$lines[$x] =~ /^\s*#v([+-])\s*$/) { 
			$verb = $1 eq '+' ? 1 : 0; 
		}
    	$vrb[$x] = $verb;
	}


	if (lc($mua) eq 'mutt') {
		# Remove all but the first attachment (if this is text/plain)
		# mutt did introduce (bah!). Remember, all this ugliness could
		# be replaced with a proper and clean edit_filter patch in 
		# mutt(1) itself...
		for ($x=0; $x<scalar(@$lines); $x++) {
		    if ($vrb[$x]) { next; }
			# The following regexp's are quite ugly because for most users
			# these lines are coloured using termcap... (bah!)
			if (($$lines[$x] =~ /^[^>[]*\[-- Attachment #(\d+)(: .*)? --\]\s*$/ &&
					(($1 ne '1') ||
					($x<scalar(@$lines) &&
						$$lines[$x+1] !~ /^[^>[]*\[-- Type: text\/plain/))) ||
				($$lines[$x] =~ /^[^>[]*\[-- End of .* data --\]\s*$/))
			{ 
				@att = @$lines[$x..$#$lines];
				splice(@$lines, $x);
				if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) { 
					unshift(@att, pop(@$lines));
				}
				last;
			}
		}

		# Pipe message/rfc822 parts to another instance of process_msg()
		# for further processing.
		# Please note that we cannot see what a hierarchy the original
		# message had -- if there were message/rfc822 parts within other
		# message/rfc822 parts constellations can occur which we cannot
		# resolve. Therefore we simply do not even try to be smart. This
		# should work for most situations:
		if (scalar(@att)) {
			for ($x=0; $x<$#att; $x++) {
	            if ($vrb[scalar(@$lines)+$x]) { next; }
	            # The following regexp is quite ugly because for most
				# users the line is coloured using termcap... (bah!)
	            if ($att[$x]=~/^[^>[]*\[-- Attachment #\d+(: .*)? --\]\s*$/ &&
					$att[$x+1] =~ /^[^>[]*\[-- Type: message\/rfc822/)
				{
					$x += 2;
					while ($att[$x] !~ /^\s*$/) { $x++; }
					$x++;

					my @tmp = @att[$x..$#att];
					process_msg(\@tmp);
					splice(@att, $x, scalar(@att)-$x, @tmp);
				}
			}
		}
	}

	# Remove ML footers:
	remove_footers($lines, \@ftr, $footers, undef);

	# Remove ad footers:
	remove_footers($lines, \@ads, $ads, undef);

	# Remove signature:
	if (scalar(@$lines)) { 
		for ($x=0; $x<scalar(@$lines); $x++) {
			if ((!$vrb[$x]) && $$lines[$x] =~ /^-- $/) {
				if ($diff) {
					for (my $i=1; $x+$i+1<scalar(@$lines); $i++) {
						if ($$lines[$x+$i] =~ /^\-{3}\ .+/ &&
							$$lines[$x+$i+1] =~ /^\+{3}\ .+/)
						{
							$sig = 0;
							@sig = @$lines[$x..$#$lines];
							splice(@$lines, $x);
							last;
						}
					}
					if (scalar(@sig)) { last; }
				}

				if ($sig || ($lsig && ($#$lines-$x>$lsig))) {
					if ($lsig && !$sig) {
						push(@sig, "[---=| Overlong signature removed by $0: " . 
							(scalar(@$lines)-$x) . " lines snipped |=---]\n");
					}
					splice(@$lines, $x);
				}
				elsif ($#$lines-$x<=($lsig?$lsig:$maxsig)) {
					@sig = @$lines[$x..$#$lines];
					splice(@$lines, $x);
				}
				last;
			}
		}
	}

	# Now care about TOFU.
	# One common mispractice is M$ style TOFU:
	if ($ms) {
    	# bloat this array if you want more internationalization:
	    my @tofu = ('Original Message',
    	            'Ursprngliche Nachricht',
        	        'Ursprungliche Nachricht',
            	    'Mensagem original');

	    DONE: for ($x=0; $x<scalar(@$lines); $x++) { 
    	    if (!$vrb[$x]) {
	            foreach my $tmp (@tofu) {
    	            if ($$lines[$x] =~ /^-+\s?$tmp\s?-+\s*$/) { 
        	            $x++; 
            	        last DONE; 
                	}
	            }
    	    }
	    }

		$j = scalar(@$lines)-$x;
		splice(@$lines, $x); 
	}

	# Nothing? Then try traditional TOFU:
	if ($trad && (!$j) && !$vrb[$#$lines]) {
		if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) { 
			unshift(@sig, pop(@$lines));
	    }
		while (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) { 
			pop(@$lines);
	    }
		while (scalar(@$lines) && $$lines[$#$lines] =~ /^$indent/) {
			$j++;
			pop(@$lines);
		}
	}

	# OK, if we found TOFU, we will leave a message that we were here...
	if ($j) { 
		# make sendmail bounce if we shall be picky 
		# and indeed found something:
		if ($mda) { 
			print STDERR $boun;

			if ($sysl) {
				eval { require Sys::Syslog; }; 
				if ($@) { warn $@; } else {
					Sys::Syslog::setlogsock('unix');
					Sys::Syslog::openlog("$0[$$]", 'pid', 'mail');
					Sys::Syslog::syslog('debug', 'bounced message %s', $hdr[0]);
					Sys::Syslog::closelog();
				}
			}

			exit $EX_BOUNCE;
		}

    	push(@$lines, "[---=| TOFU protection by $0: $j lines snipped |=---]\n");
	}


	# Care for trailing whitespaces:
	if ($trsp) {
    	for ($x=0; $x<scalar(@$lines); $x++) { 
			if (!$vrb[$x]) { $$lines[$x] =~ s/[\ \t]+$//; }
		}
	}

	# Care for punctuation abuse:
	if ($elli) {
    	for ($x=0; $x<scalar(@$lines); $x++) { 
        	if (!$vrb[$x]) { $$lines[$x] =~ s/([.?!])(\1{2})\1+/$1 . $2/eg; }
	    }
	}

	# (Nearly) at last care for multiple blank lines. (Do not do this
	# earlier -- the way it is done right now would screw up the verbatim
	# list)
	if ($cr) {
    	my $t = 0;
	    for ($x=scalar(@$lines)-1; $x>=0; $x--) {
    	    if ((!$vrb[$x]) && $$lines[$x] =~ /^\s*$/) { 
        	    if ($t<2) { $t++; } else { splice(@$lines, $x, 1); }
	        }
			else { $t = 0; }
	    }
	}

	# Everything changing the body is done now. Time to fix the line count
	# header so naive clients do not get confused. Just to be sure, append
	# the old line count to X-headers.
	my $l = scalar(@bo1) + scalar(@$lines) + scalar(@att) + scalar(@bo2) +
				(!$sig?scalar(@sig):0) + (!$ml?scalar(@ftr):0) + 
				(!$ad?scalar(@ads):0);
	for ($x=0; $x<scalar(@hdr); $x++) {
		if ($hdr[$x] =~ s/^(Lines:\s+)(\d+)(\s*)?$/$1 . $l . ($3?$3:'')/e &&
			$2!=$l) 
		{ 
			$hdr[$#hdr] = "X-Old-Lines: $2\n";
			push(@hdr, "\n");
		}
	}

	# Finally, before leaving we put everything back in right order.
	unshift(@$lines, (!$hdrs?@hdr:()), @bo1);
	push(@$lines, (!$sig?@sig:()), (!$ad?@ads:()), (!$ml?@ftr:()), @att,
		@bo2);
}


# command line switches
($ad, $ads, $cr, $sysl, $diff, $elli, $footers, $ml, $ms, $mda, $mua,
	$hdrs, $lsig, $sig, $trad, $trsp) = 
	(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
my $ifile   = '-';	# use STDIN if nothing specified

# get command line params:
$0 =~ s!^.*/!!;
Getopt::Mixed::init('a A=s c d e h i=s L=s l m M=s o=s P=s p=s r S:i'.
	' s t v w debug>d diff help>h mua>M version>v');
while (my ($opt, $val, $pretty) = nextOption()) {
	if    ($opt eq 'a') 	{ $ad = 1; }
	elsif ($opt eq 'A') 	{ $ads = $val; }
    elsif ($opt eq 'c') 	{ $cr = 1; }
    elsif ($opt eq 'd') 	{ $sysl = 1; }
    elsif ($opt eq 'diff') 	{ $diff = 1; }
    elsif ($opt eq 'e') 	{ $elli = 1; }
    elsif ($opt eq 'i') 	{ $ifile = $val; }
    elsif ($opt eq 'L') 	{ $footers = $val; }
    elsif ($opt eq 'l') 	{ $ml = 1; }
    elsif ($opt eq 'm') 	{ $ms = 1; }
    elsif ($opt eq 'M') 	{ $mua = $val; }
    elsif ($opt eq 'o') 	{ $ofile = $val; }
    elsif ($opt eq 'P') 	{ $boun = $val; }
    elsif ($opt eq 'p') 	{ $mda = $val; }
    elsif ($opt eq 'r') 	{ $hdrs = 1; }
    elsif ($opt eq 'S') 	{ $lsig = $val ? $val : $maxsig; }
    elsif ($opt eq 's') 	{ $sig = 1; }
    elsif ($opt eq 't') 	{ $trad = 1; }
    elsif ($opt eq 'v') 	{ version(); }
    elsif ($opt eq 'w') 	{ $trsp = 1; }
	else { help(); }
}
Getopt::Mixed::cleanup();
if (($ml && $footers eq '')||($ad && $ads eq '')) { help(); }


# Read message:
open(IN, $ifile) || die "Could not open $ifile: $!";
my @message = <IN>;
close IN;

# this should be self-explanatory:
process_msg(\@message);

# Finally, print clean lines:
write_msg(($mda?"|$sendmail $mda":">$ofile"), \@message);

# eof
