#!/usr/bin/perl -w use strict; ################################################################ my $Version = "April 11, 2003. (c) Ivo Welch. Released under the GPL.\n"; my $homedir= $ENV{HOME}; # you may need to set this! my $configfile= "$homedir/.badenclosures.config"; # you may have to set this. my $semaphorefile= "$homedir/.badenclosures.semaphore"; # also the log file my $notagainfile= "$homedir/.badenclosures.notagain"; # to avoid sending multiple warnings; my @avoidlist= ( "application\/msword", "application\/msexcel", "application\/mspowerpoint", "application\/octet-stream; name=\".*\.xls\"", "application\/octet-stream; name=\".*\.doc\"", "application\/octet-stream; name=\".*\.ppt\"" ); ################################################################ =pod =head1 NAME badenclosures -- Notify Senders of MS Office Enclosures. =head1 SYNOPSIS This is an imap and smtp client that checks unread emails for the presence of MS office documents, and tells the sender to resend them in plain text/html/pdf. The program tries to avoid taxing the imap server too much by looking only at new messages and remembering what messages it has already checked. =head1 USAGE 1: install the CPAN modules Mail::IMAPClient and Net::SMTP . 2: make sure your home directory is correct. 3: run as "perl badenclosures.pl". if you have not set the config file, the program will write a template for you. 4: run as "nohup perl badenclosures.pl 5 &" to run this as a user daemon every 5 minutes. =head1 HOMEPAGE http://welch.econ.brown.edu/computers/ hopefully, this website always has the latest version. there will be no support for this program, although I will welcome improvements. =head1 BACKGROUND It is bad enough that Microsoft office documents frequently appear in my mailbox. They are not only harder to view than ordinary text/html/pdf files in almost any ordinary, standards-based mail readers, but these documents can also contain computer viruses and other problems. The fact that I am often expected by these senders to read the documents puts the burden on me; either to respond to them that I do not (want to) read their enclosures, or to scrounge around for a computer program that does allow me to read some subset of these documents and then hope that I read it all. An automatic response (such as the one sent by this program) is less likely to be perceived as offensive, especially if I have to respond to such emails when I am cranky. I really do not feel like writing a long response everytime explaining to the sender why he/she should write it again. Also, the sender may no longer be present when I respond. It is better for such responses to be mailed as soon as possible. In sum, I think the burden should be on the sender to provide appropriate document formats. If used by enough unix users, it might also help rid us of the plague of closed-format MS office documents. Imagine if 10% of the time when someone sends an MS office enclosure, an email comes back to this person telling him/her that the email is unreadable. Let these users complain to Microsoft! =head1 USAGE $ badenclosures minutes The argument is the number of minutes between checking the imap server. If there is no argument, the program runs once and then exits. messages that have elicited a response are saved in the .notagainfile file. it probably makes sense to delete it every once in a while; certainly once a month! we could take the liberty to delete it if there are no more unread messages, but it is sort of interesting to see who sent bad enclosures. Naturally, you can easily make this a daemon. =head1 Configuration File The configuration file is in $homedir/.badenclosures.config. Its format is simple: parameter value where parameter is one of emailaddress, signature, imaphost, imapuserid, imappassword, smtphost, smtpuserid, smtppassword. if the user does not have one, the program creates a template file and asks the user to edit it. Please do not run this program if you do not trust the privacy of your home directory. your home directory contains a file with your imap and smtp password UNENCYRPTED! =head1 DEPENDENCE please install the CPAN modules Mail::IMAPClient -- needed to read emails. Net::SMTP -- needed to send emails. =head1 NOTES This program could be an easy skeleton for a spam client, that pulls down particular kind of messages, checks them for spam, and then deletes it. I do not run this under a Windows system. This program may not work under non-linux systems. =head1 COPYRIGHT Copyright: Ivo Welch, 2003. Hereby released under the GPL. =head1 CHANGES 2003/04/12 released the first version to the public. 2003/04/15 added some more identifying enclosure types. =cut ################################################################ my $wait= $ARGV[0] || "-1"; ($wait =~ /[0-9]+/) or die "$0: Argument must be a 'number of minutes', or nothing.\n"; my $verbose= ($wait==(-1)) ? 10 : 0; my $testmode = 0; # if 1, sends the warning emails back to you, not to the sender. ################################################################ # read all the config file information, such as imap server, etc. ################################################################ our %config; readconfigfile(); ################################################################ # set the semaphore to avoid multiple processes ################################################################ (!(-e $semaphorefile)) or die "\n$0:\nSorry, but the file '$semaphorefile' already exists.\n". "This could indicate that this program is already running.\n". "If it is not, please delete this file and try again.\n\n"; open(SEM, ">$semaphorefile") or die "$0: cannot open semaphore file.\n"; (-e $semaphorefile) or die "$0: sorry, but I cannot set $semaphorefile!\n"; print SEM "# $0: $Version\n"; print SEM "# $0 with pid $$ started at ".time()." ".localtime()."\n"; print SEM "# this semaphore + log file will be deleted upon ^C\n\n"; if ($config{keepalog}) { # make writing unbuffered, so effects can be immediately seen my $old_fh= select(SEM); $|= 1; select($old_fh); warn "[$0: all output will be logged to $semaphorefile]\n"; } else { close(SEM) or warn "$0: strange, I cannot close the semaphore file!\n"; warn "[$0: no output logging!]\n"; } $SIG{INT} = sub { unlink($semaphorefile); die "$0: interrupted! cleaning up semaphore.\n"; }; ################################################################ # now, read the file with the list of messages that have already # received a complaint from us. PS: you should handdelete this # file every once in a while. ################################################################ my %alreadysent; { if (!defined(open(ALREADYSENT, $notagainfile))) { keeplog(0, "mild warning.\n\tI could not open the '$notagainfile' file on startup.\n\t". "I will be create it when I need it.\n"); } else { my @alreadysent=; close(ALREADYSENT); foreach my $m (@alreadysent) { $alreadysent{$m}= 1; } keeplog(1, "already sent ".keys(%alreadysent)." emails; avoiding resending to these."); } } ################################################################ # ready to check the Inbox on the imap server. ################################################################ my %alreadycheckedinthisrun; # this will store messages already checked; my $imapfailures=0; my $MAXIMAPFAILURES=120; # 2 hours for (my $loopcount=0; 1; ++$loopcount) { use Mail::IMAPClient; my $imap = Mail::IMAPClient->new; # returns an unconnected Mail::IMAPClient object: # intervening code using the 1st object, then: # (returns a new, authenticated Mail::IMAPClient object) $imap = Mail::IMAPClient->new( Server => $config{imaphost}, User => $config{imapuserid}, Password=> $config{imappassword}, Clear => 5, # Unnecessary since '5' is the default # ... # Other key=>value pairs go here ); if ( ! ($imap->select("Inbox") ) ) { if ($loopcount == 0) { keeplog(0, "the imap server or Inbox could not be opened ($imapfailures) upon startup."); die "$0: could not select Inbox: $@\n"; # covers previous trouble } ++$imapfailures; keeplog(0, "the imap server or Inbox could not be opened ($imapfailures)."); if ($imapfailures>$MAXIMAPFAILURES) { keeplog(0, "This is deemed hopeless now."); die "$0: could not select Inbox: $@\n"; # covers previous trouble } else { sleep($wait*60); # sleep last; # and try again } } # if you want to check reading: my $msgcount= $imap->message_count; print "Inbox has $msgcount messages.\n"; ################################################################ # figure out whether there are any unread messages in Inbox ################################################################ my @unread = $imap->unseen; if ($#unread<=0) { $imap->disconnect() or keeplog(0, "Could not disconnect: $@."); keeplog(1, "no unread messages; disconnected."); } else { keeplog(1, "You have ".($#unread+1)." unread messages: @unread."); ################################################################ # check through all unread and unchecked emails for unwanted enclosures. ################################################################ my @noneedmsg; foreach my $u (@unread) { my $uniqueid= permute($imap, $u); ($uniqueid ne "0") or next; if ($alreadycheckedinthisrun{$uniqueid}) { push(@noneedmsg, $u); next; } $alreadycheckedinthisrun{$uniqueid}=1; # we will not have to check this one again! chomp($uniqueid); if ($alreadysent{$uniqueid}) { keeplog(1, "although $uniqueid is an offender, we have already informed him."); next; } keeplog(1, "I have $uniqueid, which is new to me (not found it before)."); # read the email, but pretend it was not read; my $string = $imap->message_string($u) or die "$0: Could not read imap message: $@\n"; $imap->unset_flag("Seen",$u) or die "$0: Could not unset_flag: $@\n"; # # read the email, but pretend it was not read; my $isanavoid=""; foreach my $avoid (@avoidlist) { if ($string =~ /^\Content\-Type\: $avoid/m) { $isanavoid.= $avoid; } } if ($isanavoid =~ /[a-z]/) { $imap->mark($u) or keeplog(0, "I could not mark the message $u specially for you."); mailmessage($imap, $u, $isanavoid); } } ($#noneedmsg>=0) and keeplog(1, "unneeded to check on this loop: @noneedmsg."); $imap->disconnect or keeplog(0, "Could not disconnect from imap server: $@."); } # there were unread emails if ($wait<=0) { unlink($semaphorefile); keeplog(1, "Exiting, because I am not running as a daemon."); exit(0); } else { keeplog(2, "I am sleeping now for $wait minute(s)."); sleep($wait*60); # sleep minutes keeplog(2, "I am back awake."); } } # eternal loop ################################################################ # permute into a unique id for each message. we also save the # user id. if you want, you could use this information to send # email only once to each offender. ################################################################ sub permute { my ($imap, $u)= @_; my $msgid = $imap->message_uid($u); # get_header($u, "Messsage-ID") || "no msgid"; my $msgdate= $imap->date($u); my $msgfrom = $imap->get_header($u, "From"); return "$msgid . $msgfrom . $msgdate \n" } ################################################################ # we have detected an enclosure that we do not like. now we # have to do two things: store this as a problem email, and # then inform the mail sender. ################################################################ sub mailmessage { my ($imap, $u, $problem)= @_; my $subject = $imap->get_header($u, "Subject"); my $from = $imap->get_header($u, "From"); # my $msgid = $imap->get_header($u, "Messsage-ID") || "no msgid"; my $msgid = $imap->message_uid($u); # get_header($u, "Messsage-ID") || "no msgid"; my $msgdate= $imap->date($u); keeplog(1, "we are now mailing a warning to '$from' on message '$subject'."); my $uniqmsgid= permute($imap, $u); open(ALREADYSENT, ">> $notagainfile") or die "$0: cannot open the '$notagainfile' for a\n"; print ALREADYSENT $uniqmsgid; close(ALREADYSENT); $alreadysent{ $uniqmsgid } = 1; if ($testmode) { $from= $config{emailaddress}; } #send to yourself! debug only sendsmtpmail($from, $subject, $problem); } ################################################################ # the actual email sending via smtp; # this is how to test this: # first, set the DEBUG variable in new to 1. then run # sendsmtpmail("\"someone \"", "microsoft word"); ################################################################ sub sendsmtpmail { use Net::SMTP; my ($to, $psubject, $problem)= @_; chomp($psubject); # now do the actual mail sending! could use domain, such as 'Hello => 'yale.edu',' my $smtp= Net::SMTP->new($config{smtphost}, Timeout =>60, Debug=>0 ); ($smtp) or die "$0: I am unable to talk to the SMTP host.\n"; if (defined($config{smtppassword}) && ($config{smtppassword} =~ /[0-9a-zA-Z]/)) { $smtp->auth($config{smtpuserid}, $config{smtppassword}) or die "$0: I cannot authenticate '$config{smtpuserid}'\n"; } my $qtitle= substr($psubject,0,20); $smtp->mail($config{emailaddress}); $smtp->to($to); $smtp->data(); $smtp->datasend("To: $to\n"); $smtp->datasend("Subject: Your email '$qtitle' to $config{emailaddress}.\n"); $smtp->datasend("\n Your email (subject '$qtitle') has enclosed a document that is of type '$problem', most likely created by Microsoft Office. Please realize that MS Office enclosures are written in a binary closed format that is not standard. Microsoft has not disclosed the internal format of these binary files, and/or made universal readers available. Therefore, your enclosure cannot be reliably read on computers on which Microsoft Office is not installed. This includes not only many Windows and Mac machines, but almost all unix workstations (such as sun or linux workstations), because neither MS Office itself nor viewers for MS Office documents are available under unix. Therefore, because I am running linux, please realize that I cannot read your enclosure. If you want me to read your enclosure, then please resend your email in text, html, or pdf format. Microsoft Office applications have a 'Save As' option that should allow you to convert your document into either .html or .txt (ASCII) or .csv format. If you have Acrobat installed, you can also create and email me an acrobat .pdf file. Yes, I am sorry: I know this is a pain in the neck, but reading MS documents is more of a pain in the neck for me than it is for you to create a standard open document. Complain to Microsoft. An important side benefit is that text/html/pdf files are easier to view in any mail reader, and that they cannot include viruses or trojan horses. This can be a problem with Microsoft enclosures. Sincerely, $config{signature} PS: Your email was read (and this email was sent) by an automated mail reader called badenclosures.pl, freely available at http://welch.econ.brown.edu/computers/ . [please do not reply to this email itself, but resend your own document.] "); $smtp->quit() or warn "\n$0:\n failed to smtp. please set the debug variable in SMTP to 1, and look at why you failed.\n"; if ($config{notifyme}) { # now do the actual mail sending! $smtp= Net::SMTP->new($config{smtphost}, Debug=>0 ); ($smtp) or die "$0: I am unable to talk to the SMTP host.\n"; if (defined($config{smtppassword}) && ($config{smtppassword} =~ /[0-9a-zA-Z]/)) { $smtp->auth($config{smtpuserid}, $config{smtppassword}) or die "$0: I cannot authenticate '$config{smtpuserid}'\n"; } $smtp->mail($config{emailaddress}); $smtp->to($config{emailaddress}); $smtp->data(); $smtp->datasend("To: $config{emailaddress}\n"); $smtp->datasend("Subject: Sent Warning on '$qtitle' to $config{emailaddress}.\n"); my $hostname= `hostname`; $smtp->datasend("\n Notification: The program '$0' running on $hostname has sent an email to '$to', asking him/her to resend the email '$qtitle' without the MS enclosure. "); $smtp->quit() or warn "\n$0:\n failed to smtp the confirmation. please set the debug variable in SMTP to 1, and look at why you failed.\n"; } } ################################################################ sub readconfigfile { my @required = qw(emailaddress signature imaphost imapuserid imappassword smtphost smtpuserid smtppassword notifyme keepalog); if (!defined(open(PWD, $configfile))) { warn "\n$0:\nYou need to create a '$configfile' file ($!).\nI will now try to create a template file for you.\n\n"; open(PWD, ">$configfile") or die "$0: sorry, but I cannot create a template '$configfile' for you ($!). I am therefore out of options.\n"; print PWD " # this file contains the parameters for the badenclosures program . emailaddress yourid\@yourhost.domain signature yourname\\nProud user of 'badenclosures'!\\n imaphost imaphostname.domain imapuserid put-in-your-own imappassword put-in-your-own smtphost smtphostname.domain smtpuserid put-in-your-own smtppassword put-in-your-own notifyme 1 keepalog 1 "; close(PWD); die "$0: I have successfully created the template file '$configfile' for you.\nNow edit it, please.\n\n"; } while () { ($_ !~ /^[ ]*$/) or next; # ignore empty lines ($_ !~ /^\#/) or next; # ignore comment lines my ($key, $val) = split(/\t/, $_); $val=~ s/\\n/\n/g; # new lines in the signature file chomp($val); $config{$key}= $val; } close(PWD); foreach my $v (@required) { (defined($config{$v})) or die "$0: the key '$v' was not defined in your config file!\n"; } # any other keys are ignored. you could check for which ones are there that should not be there. } ################################################################ sub keeplog { my ($importance, $msg)= @_; my $output= "$0:".localtime().":\t$msg\n"; ($verbose) and print STDERR $output; ($config{keepalog}) and print SEM $output; }