#!/usr/bin/perl -ws use strict; $|++; # stripmime - attempt to strip any HTML alt and attachments from MIME email # Steve Kinzler, kinzler@cs.indiana.edu, Jun 09 # http://www.cs.indiana.edu/~kinzler/home.html#other # derived from a script by Randal L Schwartz, merlyn@stonehenge.com, Jan 01 # http://www.perlmonks.org/index.pl?node_id=53404 # This is safe for all emails, but we suggest restricting to those with # /boundary/ in the header's Content-Type for efficiency. This script # assumes some conventional MIME structures and may not work as desired # with others, just possibly leaving things unstripped. It also assumes # all lines beginning with "From " after a blank line are mailbox envelopes. my $usage = "usage: $0 [ -t | -a ] [ -r ] [ -b | -f ] [ file ... ] -t strip any HTML alternative in the usual places (default) -a also strip any attachments in the usual places -r don't strip CRs from CR-NL line endings in the plaintext -b take input as a mbox and filter all messages -f take input as a mbox and filter first message\n"; die $usage if $::h; 1 if $::h || $::t || $::t || $::a || $::b || $::r; use MIME::Parser; use MIME::Entity; my $parser = MIME::Parser->new; $parser->output_to_core(1); $parser->tmp_to_core(1); my @msgs = (); if ($::b || $::f) { @msgs = split(/\n\nFrom /, join('', <>)); for (0 .. $#msgs - 1) { $msgs[$_] .= "\n\n" } for (1 .. $#msgs) { $msgs[$_] = 'From ' . $msgs[$_] } } else { @msgs = (join('', <>)); } my $didone = 0; foreach (@msgs) { print, next if $::f and $didone++; my @msg = split(/\n/, $_ . 'EOMsg'); pop @msg; for (0 .. $#msg) { $msg[$_] .= "\n" } exit 0 if ! @msg; my $envT = (@msg and $msg[0] =~ /^From /) ? shift @msg : ''; warn("$0: passing thru (not a mail message)\n"), print($envT, @msg), exit 0 if ! @msg or $msg[0] !~ /^([-\w]+:\s|$)/; my $envB = ($envT and $msg[$#msg] =~ /^$/) ? pop @msg : ''; my $ent = eval { $parser->parse_data(join('', @msg)) }; warn($@), print($envT, @msg, $envB), exit 1 if $@; @msg = (); &striphtml($ent); &striphtml($ent->parts(0)) and $ent->sync_headers(Nonstandard => 'ERASE') if &mixed($ent->effective_type); &stripattach($ent) if $::a; print $envT; $ent->print; print $envB; } ############################################################################### sub mixed { $_[0] eq 'multipart/mixed' or $_[0] eq 'multipart/related'; } sub striphtml { my $ent = $_[0]; return 0 unless $ent->effective_type eq 'multipart/alternative' and $ent->parts == 2 and $ent->parts(0)->effective_type eq 'text/plain' and $ent->parts(1)->effective_type eq 'text/html'; &strip($ent); 1; } sub stripattach { my $ent = $_[0]; return 0 unless &mixed($ent->effective_type) and $ent->parts > 1 and $ent->parts(0)->effective_type eq 'text/plain'; &strip($ent); 1; } sub strip { my $ent = $_[0]; my $txt = $ent->parts(0)->bodyhandle->as_string; $txt =~ s/\r\n/\n/g unless $::r; $ent->parts([MIME::Entity->build( # Data => $ent->parts(0)->body_as_string # bug in orig # . "\n\n[[HTML alternate version deleted]]\n"); # don't want Data => $txt, Encoding => -SUGGEST)]); $ent->make_singlepart; $ent->sync_headers(Nonstandard => 'ERASE'); }