Date: Wed, 22 Jun 94 03:16:43 GMT From: dolan AT fnoc DOT navy DOT mil (Kent Dolan) To: djgpp AT sun DOT soe DOT clarkson DOT edu Subject: email header filter for overwhelmed mail echo byte junkies For those who insist on keeping every valuable word of the djgpp email echo, the appended nawk script sucked about half the volume out of a megabyte of it without losing anything worth having. Not recommended for Mime mail that is really mixed mode, but fine for chat email. Usage: nawk -f name_you_give_script bulky_email_archive > less_bulky_version Before pitching the old version and replacing it with the new version, do a mail -f less_bulky_version to make sure you haven't broken something your mailer needs to find the boundaries between the files. This script occassionally throws away useful bits of email, but not often enough to be worth making robust. The robust version would have another mode wrapped around the current FollowOn mode, that finds a line starting with "From ", processes the header as shown until a blank line is encountered, and then prints everything until the next line starting with "From ". This is left as an exercise for the reader. The value of this version is all the various kinds of trash in a header that this script knows about, culled from about 4 megs of saved email. Xanthian. -- Kent, the man from xanth. Kent Paul Dolan. ---------------------------8<----cut here---->8----------------------------- BEGIN { FollowOn = "F" } FollowOn == "T" { if ((NF == 0) || (index($0,$1) == 1)) { FollowOn = "F" } else { next } } $1 == "Received:" { FollowOn = "T" next } $1 == "Original-Received:" { FollowOn = "T" next } $1 == "X-Disclaimer:" { FollowOn = "T" next } $1 == "References:" { FollowOn = "T" next } $1 == "Content-transfer-encoding:" { next } $1 == "Followup-To:" { next } $1 == "Forwarded-By:" { next } $1 == "Message-ID:" { next } $1 == "Message-id:" { next } $1 == "Originally-From:" { next } $1 == "Originally-Message-Id:" { next } $1 == "Resent-By:" { next } $1 == "Resent-Date:" { next } $1 == "Resent-From:" { next } $1 == "Resent-Message-Id:" { next } $1 == "Resent-To:" { next } $1 == "Resent-message-id:" { next } $1 == "Resent-to:" { next } $1 == "Return-path:" { next } $1 == "X-:" { next } $1 == "X-Antipastobozoticataclysm:" { next } $1 == "X-Cc:" { next } $1 == "X-Envelope-From:" { next } $1 == "X-Envelope-to:" { next } $1 == "X-Followup-To:" { next } $1 == "X-Mailed-To:" { next } $1 == "X-News:" { next } $1 == "X-Nsa-Fodder:" { next } $1 == "X-Origin:" { next } $1 == "X-Pcmail-Return-Receipt-Requested:" { next } $1 == "X-Phone:" { next } $1 == "X-Phones:" { next } $1 == "X-Punted-By:" { next } $1 == "X-Software:" { next } $1 == "X-VMS-Cc:" { next } $1 == "X-VMS-To:" { next } $1 == "X-Vms-Mail-To:" { next } $1 == "X-Vmsmail-To:" { next } $1 == "XDate:" { next } $1 == "XFrom:" { next } $1 == "XIllegal-Object:" { next } $1 == "XMessage-Id:" { next } $1 == "XReceived:" { next } $1 == "XStatus:" { next } $1 == "XSubject:" { next } $1 == "XTo:" { next } $1 == "Message-Id:" { next } $1 == "Lines:" { next } $1 == "Path:" { next } $1 == "Return-Path:" { next } $1 == "X-Envelope-To:" { next } $1 == "X-Vms-To:" { next } $1 == "X-Mts:" { next } $1 == "Mailer:" { next } $1 == "Posted-Date:" { next } $1 == "In-Reply-To:" { next } $1 == "Mime-Version:" { next } $1 == "Content-Type:" { next } $1 == "Content-Transfer-Encoding:" { next } $1 == "Content-Length:" { next } $1 == "X-Mailer:" { next } $1 == "X-Sender:" { next } $1 == "Encoding:" { next } $1 == "Pp-Warning:" { next } $1 == "Reply-To:" { next } $1 == "Sender:" { next } $1 == "Xref:" { next } $1 == "X-BBS-Msg-Type:" { next } $1 == "X-Date:" { next } $1 == "X-From:" { next } $1 == "X-Md4-Signature:" { next } $1 == "X-NEWS:" { next } $1 == "X-Newsgroups:" { next } $1 == "X-Newsreader:" { next } $1 == "X-Organization:" { next } $1 == "X-Posted-From:" { next } $1 == "X-Posted-Through:" { next } $1 == "X-Posting-Agent-by:" { next } $1 == "X-Posting-Agent:" { next } $1 == "X-Posting-Software:" { next } $1 == "X-To:" { next } $1 == "X-Unparsable-Date:" { next } $1 == "X-UserAgent:" { next } $1 == "X-XXDate:" { next } $1 == "Cc:" { next } $1 == "Nntp-Posting-Host:" { next } $1 == "NNTP-Posting-Host:" { next } $1 == "In-reply-to:" { next } $1 == "Originator:" { next } $1 == "Distribution:" { next } $1 == "Apparently-To:" { next } $1 == "Content-Identifier:" { next } $1 == "Errors-To:" { next } $1 == "Priority:" { next } $1 == "Received-Date:" { next } $1 == "Return-Receipt-To:" { next } $1 == "S-Mail:" { next } $1 == "X-Authentic-Sender:" { next } $1 == "X-Char-Esc:" { next } $1 == "X-Charset:" { next } $1 == "X-Face:" { next } $1 == "X-Hpvue$Revision:" { next } $1 == "X-Pmrqc:" { next } $1 == "X-Popmail-Charset:" { next } $1 == "X-Sun-Charset:" { next } $1 == "X-Vms-Cc:" { next } $1 == "X-Vue-Mime-Level:" { next } $1 == "X400-Content-Type:" { next } $1 == "X400-Mts-Identifier:" { next } $1 == "X400-Originator:" { next } $1 == "X400-Received:" { next } $1 == "X400-Recipients:" { next } { print }