#!/usr/bin/env python

# hep_filter - scoring/sorting daily abstract announcements by arXiv.org
# kkumer@phy.hr 2007-08-24 
# Version: 0.6 

import string,re,sys,smtplib

# Configure this to match your setup
sfile = "/home/kkumer/.procmail/hep_scores.rc"   # scores file
fromaddr = "hep_filter@mydomain.com"  # "From" field of processed email
   # There is no need to create real user account 'hep_filter'.
   # This is how procmail recognizes that email is already hep_filtered.
toaddrs = "myself@mydomain.com"   # "To" field, your email probably
mailserver = "localhost"   # Your SMTP server machine
   # If you need login and password for some remote host, see the end of
   # this script
# End of user configurable part

def boldify(mo):
    """boldify(MatchObject): Simple boldifying string -> *string*"""
    return "*"+mo.group()+"*"

def scoreAbstract(ab, scores):
    """ scoreAbstract(ab, scores): Boldifies and calculates total score. 
        Returns 2-tuple (boldified abstract, score)."""
    score = 0
    # Going over score patterns, boldifying and adding scores
    for scorePat in scores:
        # print scorePat
        ab, noMatches = re.subn(re.compile(scorePat[0],re.I), boldify, ab)
        score = score + noMatches*int(scorePat[1])
    # Adding line "Score: <score>" to mail
    ab = re.compile(r'(^Title: .*$)', re.M).sub(
         "Score: "+str(score)+"\n\\1",ab)
    return (ab, score)

def abSort(ab1, ab2):
    """ abSort(ab1, ab2): Sorting function. Sorts 2-tuples by scores in
        second element """
    if ab1[1] > ab2[1]:
        return -1
    elif ab1[1] == ab2[1]:
        return 0
    else:
        return 1


# reading mail message from stdin
mail = sys.stdin.read()
# headers are everything up to the first blank line
headers = re.compile(r'.*?^$', re.S|re.M).match(mail).group()
# extracting subject for later remailing
subject = re.compile(r'^Subject: .*?$', re.M).search(headers).group()

# creating abs - list of abstracts
pat = re.compile(r'^-*\n\\\\\narXiv.*?\( http://arxiv.org.*?kb\)', re.S|re.M)
abs = pat.findall(mail)

# reading scoring file into list 'scores' of 2-tuples (pattern, score)
scores=[]
scoreFile = open(sfile, "r")
scoreLine = scoreFile.readline()
while scoreLine:
    # only lines with '->' are parsed
    if re.search(r'->', scoreLine):
        # wildcard processing
        scoreLine = re.sub(r'%', "\\\\S*", scoreLine) 
        scores.append(tuple(map(string.strip,string.split(scoreLine,"->"))))
    scoreLine = scoreFile.readline()

# create list of scoreAbstract 2-tuples (abstract, score)
absScored = []
for ab in (abs):
    absScored.append(scoreAbstract(ab, scores))

# sorting by score
absScored.sort(abSort)

# creating message for remailing
msg = "From: "+fromaddr+"\nTo: "+toaddrs+"\n"+subject+"\n\n"
for ab in absScored:
    msg = msg + ab[0] + "\n"
msg = msg + "\n"+78*"-"+"\n"
msg = msg + "For problems with hep_filter contact kkumer@phy.hr"
# For debugging:
## print msg

# Now, we should check that this is really a list of abstracts and not some
# administrative email which should be forwarded untouched! 
if len(abs) < 3: # less than 3 abstracts probably means there is none
    msg = mail
    msg = msg + "\n"+78*"-"+"\n"
    msg = msg + "hep_filter decided that this is not a mail with abstracts!\n"
    msg = msg + "For problems with hep_filter contact kkumer@phy.hr"

# remailing
server = smtplib.SMTP(mailserver)
# server.login("username", "password")  # This should work but I haven't tried
server.sendmail(fromaddr, toaddrs, msg)
server.quit()
