#!/usr/bin/env python # hep_filter - scoring/sorting daily abstract announcements by arXiv.org # kkumer@phy.hr 2007-08-24 # Version: 0.6 import string,re,sys,smtplib # Configure this to match your setup sfile = "/home/kkumer/.procmail/hep_scores.rc" # scores file fromaddr = "hep_filter@mydomain.com" # "From" field of processed email # There is no need to create real user account 'hep_filter'. # This is how procmail recognizes that email is already hep_filtered. toaddrs = "myself@mydomain.com" # "To" field, your email probably mailserver = "localhost" # Your SMTP server machine # If you need login and password for some remote host, see the end of # this script # End of user configurable part def boldify(mo): """boldify(MatchObject): Simple boldifying string -> *string*""" return "*"+mo.group()+"*" def scoreAbstract(ab, scores): """ scoreAbstract(ab, scores): Boldifies and calculates total score. Returns 2-tuple (boldified abstract, score).""" score = 0 # Going over score patterns, boldifying and adding scores for scorePat in scores: # print scorePat ab, noMatches = re.subn(re.compile(scorePat[0],re.I), boldify, ab) score = score + noMatches*int(scorePat[1]) # Adding line "Score: " to mail ab = re.compile(r'(^Title: .*$)', re.M).sub( "Score: "+str(score)+"\n\\1",ab) return (ab, score) def abSort(ab1, ab2): """ abSort(ab1, ab2): Sorting function. Sorts 2-tuples by scores in second element """ if ab1[1] > ab2[1]: return -1 elif ab1[1] == ab2[1]: return 0 else: return 1 # reading mail message from stdin mail = sys.stdin.read() # headers are everything up to the first blank line headers = re.compile(r'.*?^$', re.S|re.M).match(mail).group() # extracting subject for later remailing subject = re.compile(r'^Subject: .*?$', re.M).search(headers).group() # creating abs - list of abstracts pat = re.compile(r'^-*\n\\\\\narXiv.*?\( http://arxiv.org.*?kb\)', re.S|re.M) abs = pat.findall(mail) # reading scoring file into list 'scores' of 2-tuples (pattern, score) scores=[] scoreFile = open(sfile, "r") scoreLine = scoreFile.readline() while scoreLine: # only lines with '->' are parsed if re.search(r'->', scoreLine): # wildcard processing scoreLine = re.sub(r'%', "\\\\S*", scoreLine) scores.append(tuple(map(string.strip,string.split(scoreLine,"->")))) scoreLine = scoreFile.readline() # create list of scoreAbstract 2-tuples (abstract, score) absScored = [] for ab in (abs): absScored.append(scoreAbstract(ab, scores)) # sorting by score absScored.sort(abSort) # creating message for remailing msg = "From: "+fromaddr+"\nTo: "+toaddrs+"\n"+subject+"\n\n" for ab in absScored: msg = msg + ab[0] + "\n" msg = msg + "\n"+78*"-"+"\n" msg = msg + "For problems with hep_filter contact kkumer@phy.hr" # For debugging: ## print msg # Now, we should check that this is really a list of abstracts and not some # administrative email which should be forwarded untouched! if len(abs) < 3: # less than 3 abstracts probably means there is none msg = mail msg = msg + "\n"+78*"-"+"\n" msg = msg + "hep_filter decided that this is not a mail with abstracts!\n" msg = msg + "For problems with hep_filter contact kkumer@phy.hr" # remailing server = smtplib.SMTP(mailserver) # server.login("username", "password") # This should work but I haven't tried server.sendmail(fromaddr, toaddrs, msg) server.quit()