#!/usr/bin/env python # gethep by kkumer@phy.hr # $Id: gethep,v 1.1.1.1 2003/01/30 16:52:56 kkumer Exp $ # Version $Name: $ # Usage: # gethep -s quant-ph/0012149 -> saves quant-ph-0012149.ps.gz [default] # gethep -d quant-ph/0012149 -> displays it on screen ########################################## # User modifiable options: # preprint server: server="it.arXiv.org" # preferred file format ("pdf" or "ps"): format="pdf" # PS resolution (don't comment this out, it is irrelevant for pdf) res='?dpi=600&font=bitmapped' # program for displaying ("acroread", "gv", whatever you use) disprog="acroread" # directory for temporary files tmpdir='/tmp' # how long to wait for file to get created (in seconds) # this will increase by 10 secs for each new try waittime = 5 ########################################## import httplib,sys,re,string,commands,time,mimetools oldflag=0 # flag for old-style papers # preparing filename extensions for saving if format=="ps": extension=".ps.gz" elif format=="pdf": extension=".pdf" else: print "Unknown format: "+format sys.exit(1) # Checking input if len(sys.argv)!=3: if len(sys.argv)==2: id=sys.argv[1] else: print '' print '==== gethep 1.0 by kkumer@phy.hr 2003-01-30 ====' print '' print 'Usage: gethep [-s|-d] archive/number (e.g. hep-ph/0101001)' print '-s save to file [default]' print '-d display in viewer' print '' sys.exit(1) else: id=sys.argv[2] class hep: def __init__(self,paper_id): self.filename=string.replace(paper_id,'/','-')+extension h=httplib.HTTP(server) #connecting # sending request the file h.putrequest('GET', '/'+format+'/'+paper_id+res) # servers insist on the following header h.putheader('User-Agent', 'gethep/1.0 by kkumer[at]phy.hr') h.endheaders() self.errcode,self.errmsg,self.headers=h.getreply() if self.errcode!=200: print "Errcode: "+str(self.errcode) print "Server error!" sys.exit(1) # Getting data (gzipped paper or "invalid id" or "wait for PS") f=h.getfile() self.data=f.read() f.close() def save(self,filename): fo=open(filename, "w") fo.write(self.data) fo.close() def show(self,filename): w=commands.getoutput(disprog+" "+filename) print w class oldstyle(hep): def __init__(self,url): # taking the last part of url as the filename self.filename=re.split(r'/', url)[-1] h=httplib.HTTP(server) #connecting # sending request for file h.putrequest('GET', url) # servers insist on the following header h.putheader('User-Agent', 'gethep/1.0beta by kkumer[at]phy.hr') h.endheaders() self.errcode,self.errmsg,self.headers=h.getreply() if self.errcode!=200: print "Errcode: "+str(self.errcode) print "Server error!" sys.exit(1) # Getting data (gzipped paper or "invalid id" or "wait for PS") f=h.getfile() self.data=f.read() f.close() # We are repeatedly trying to download until we get the real paper or # encounter some insurmountable problem unfinished = 1 while unfinished: paper = hep(id) # Downloading unfinished = 0 # If problem occurs, return this to 1 later if re.search("Invalid paper", paper.data): print "Paper "+id+" is invalid!" sys.exit(1) if re.search("ostscript unavailable", paper.data): print "Postscript for paper "+id+" is unavailable!" print "This is most probably due to some mistake by authors." print "Maybe this is PDF-only paper." sys.exit(1) if re.search("PDF unavailable", paper.data): print "PDF for paper "+id+" is unavailable!" print "This is most probably due to some mistake by authors." sys.exit(1) if re.search("automatically create", paper.data): print "waiting "+str(waittime)+" seconds for file to get created" time.sleep(waittime) unfinished = 1 waittime = waittime + 10 # old-style papers with detached figures (PS only) if re.search("following files", paper.data): print "This is the old-style paper with detached figures." print """I'll try to download everything (PS), and I'll save it regardless of the -d/-s flags.""" oldflag=1 # searching the page for URLs of papers allurls=re.findall(r'"/PS_cache/.*ps.gz"', paper.data) nourls=len(allurls) # number of files # Taking the quotation marks off for n in range(nourls): allurls[n]=allurls[n][1:][:-1] paper_old=[] # list of files will be here # getting files and saving them for n in range(nourls): paper_old.append(oldstyle(allurls[n])) paper_old[n].save(paper_old[n].filename) print str(nourls)+" files downloaded." # File(s) successfully downloaded. Process them as requested: if oldflag: # displaying if requested, main file only if sys.argv[1]=="-d": for n in range(nourls): # recognizing the main file, then displaying it if re.match('\d*.ps.gz', paper_old[n].filename): paper_old[n].show(paper_old[n].filename) else: # displaying if sys.argv[1]=="-d": print "Displaying "+paper.filename paper.save(tmpdir+"/"+paper.filename) paper.show(tmpdir+"/"+paper.filename) w=commands.getoutput("rm "+tmpdir+"/"+paper.filename) # saving else: print "Saving "+paper.filename paper.save(paper.filename)