#!/usr/bin/env python # getbib by kkumer@phy.hr # v. 1.5 2007-08-24 # Gets BibTeX info for an article from SPIRES database # Usage: # getbib quant-ph/0012149 # getbib xxxxxxx (where xxxxxx is SPIRES key which can be # found by looking there at links on keywords, bibtex's etc.) # This is useful for older papers which are not on arXiv org! ########################################## # User modifiable options: # SPIRES server: #server="www.slac.stanford.edu" server="www-library.desy.de" # path to the SPIRES interface: path="/spires/find/hep/www" path="/cgi-bin/spiface/find/hep/www" # GGI commands (%s stands for ID that will be substituted) #cgibibtex="?rawcmd=fin+eprint+%s&FORMAT=WWWBRIEFBIBTEX" cgibibtex="?key=%s&FORMAT=WWWBRIEFBIBTEX" cgisummary="?rawcmd=fin+eprint+%s&FORMAT=WWW" cgikeywords="topics?key=%s" ########################################## import httplib,sys,re,string,commands,time # Checking input if len(sys.argv)!=2: print '' print 'Usage: getbib archive/number' print '' sys.exit(1) else: id=sys.argv[1] class hep: def __init__(self,command,paper_id): h=httplib.HTTP(server) #connecting # sending request the file h.putrequest('GET', path+command % paper_id) # servers insist on the following header h.putheader('User-Agent', 'getbib/1.0beta by kkumer[at]phy.hr') h.endheaders() self.errcode,self.errmsg,self.headers=h.getreply() if self.errcode!=200: print "Errcode: "+str(self.errcode) print "Server error!" sys.exit(1) # Getting data (gzipped paper or "invalid id" or "wait for PS") f=h.getfile() self.data=f.read() f.close() # if hep number given on command line: # get summary page and extract paper "key" ID from there if re.search('[/.]',id): id=string.replace(id,'/','%2F') page=hep(cgisummary,id) try: keyid=re.search(r'www\?key=(.*?)&',page.data).group(1) except: sys.stderr.write('ERROR: No page. Are you sure paper %s exists?\n' % string.replace(id,'%2F','/')) sys.exit(1) # Otherwise expect that argument given is keyid itself else: keyid=id # Getting BiBTeX entry page=hep(cgibibtex,keyid) try: bibtex=re.search('@Article.*\n}\n',page.data,re.DOTALL).group() except: sys.stderr.write('ERROR: No BibTeX. Are you sure paper %s exists?\n' % keyid) sys.exit(1) # Getting keywords page=hep(cgikeywords,keyid) kwpat=re.compile(r'www\?dk=.*?>(.*?)',re.DOTALL) keywords_raw=kwpat.findall(page.data) # cleaning up keywords=[re.sub('\n',' ',str).strip() for str in keywords_raw] # putting in BibTeX format kwlist=[' keywords = "%s",\n' % str for str in keywords] # appending it bibtex=re.sub('"\n}\n','",\n'+string.join(kwlist),bibtex) # appending SPIRES key id bibtex=bibtex+' spireskey = "%s",\n' % keyid # appending pdf, group and comments field bibtex=bibtex+' pdf = "%s",\n group = "",\n comments = ""\n}\n' % string.replace(id+'.pdf','%2F','-') print bibtex