Arxiv.py: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
(accommodate new arXiv five digit format) |
||
(3 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
Copy the following text into a file called arxiv.py | Copy the following text into a file called arxiv.py and save it. | ||
Further usage instructions are available at [[ArXiv_script]]. | |||
<pre> | <pre> | ||
#! /usr/bin/python | #! /usr/bin/python | ||
## arXiv script version 0. | ## arXiv script version 0.3 | ||
## Copyright | ## Copyright 2015 Tom Brown | ||
## This program is free software; you can redistribute it and/or | ## This program is free software; you can redistribute it and/or | ||
Line 48: | Line 50: | ||
''' | ''' | ||
__version__ = "0. | __version__ = "0.3" | ||
__author__ = "Tom Brown" | __author__ = "Tom Brown" | ||
__copyright__ = "Copyright | __copyright__ = "Copyright 2015 Tom Brown, GNU GPL 3" | ||
import sys, os, getopt, re, urllib | import sys, os, getopt, re, urllib, gzip | ||
def findRefType(ref): | def findRefType(ref): | ||
ref = ref.replace('arxiv:','') | ref = ref.replace('arxiv:','') | ||
if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref): | if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref): | ||
type = 'old-style eprint' | type = 'old-style eprint' | ||
elif re.search(r'^\d{7}$',ref): | elif re.search(r'^\d{7}$',ref): | ||
type = 'old-style eprint' | type = 'old-style eprint' | ||
ref = 'hep-th/' + ref | ref = 'hep-th/' + ref | ||
elif re.search('^\d{4}\.\d{4}$',ref): | elif re.search('^\d{4}\.\d{4,5}$',ref): | ||
type = 'new-style eprint' | type = 'new-style eprint' | ||
else: | else: | ||
Line 83: | Line 85: | ||
def downloadPS(ref,type,downloadPath): | def downloadPS(ref,type,downloadPath): | ||
downloadPath = os.path.expanduser(downloadPath) | downloadPath = os.path.expanduser(downloadPath) | ||
filename = downloadPath + ref.replace('/','-') | |||
urllib.urlretrieve('http://arxiv.org/ps/' + ref, filename) | |||
gzipFile = gzip.GzipFile(filename) | |||
psFile = open(filename + ".ps","w") | |||
psFile.write(gzipFile.read()) | |||
psFile.close() | |||
gzipFile.close() | |||
os.remove(filename) | |||
def downloadSource(ref,type,downloadPath): | def downloadSource(ref,type,downloadPath): | ||
downloadPath = os.path.expanduser(downloadPath) | downloadPath = os.path.expanduser(downloadPath) | ||
filename = downloadPath + ref.replace('/','-') | |||
urllib.urlretrieve('http://arxiv.org/e-print/' + ref, filename + ".dum") | |||
gzipFile = gzip.GzipFile(filename + ".dum") | |||
sourceFile = open(filename,"w") | |||
sourceFile.write(gzipFile.read()) | |||
sourceFile.close() | |||
gzipFile.close() | |||
os.remove(filename + ".dum") | |||
Line 119: | Line 127: | ||
def getComments(html): | def getComments(html): | ||
if | if "comments" not in html: | ||
return "no comments" | return "no comments" | ||
else: | else: | ||
Line 128: | Line 136: | ||
def getJref(html): | def getJref(html): | ||
if | if "jref" not in html: | ||
return "no journal reference" | return "no journal reference" | ||
else: | else: | ||
Line 206: | Line 214: | ||
type, ref = findRefType(ref) | type, ref = findRefType(ref) | ||
print "Reference",ref,"is of type",type | |||
if type=="not arXiv": | if type=="not arXiv": | ||
Line 211: | Line 222: | ||
sys.exit(0) | sys.exit(0) | ||
if | if authorOpt+titleOpt+abstractOpt+commentsOpt+jrefOpt > 0: | ||
htmlObject = urllib.urlopen('http://arxiv.org/abs/' + ref) | htmlObject = urllib.urlopen('http://arxiv.org/abs/' + ref) | ||
html = htmlObject.read() | html = htmlObject.read() |
Latest revision as of 15:14, 16 August 2015
Copy the following text into a file called arxiv.py and save it.
Further usage instructions are available at ArXiv_script.
#! /usr/bin/python ## arXiv script version 0.3 ## Copyright 2015 Tom Brown ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 3 of the ## License, or (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. ## See http://www.stringwiki.org/wiki/ArXiv_script for more usage ## instructions '''arXiv script Usage: python arxiv.py reference [ -htabcjdps ] [ --help ] "reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303. Options: -h, --help displays this help message -t displays the title -a displays the author(s) -b displays the aBstract -c displays the comments -j displays the journal reference -d downloads the PDF -p downloads the PS -s downloads the source file ''' __version__ = "0.3" __author__ = "Tom Brown" __copyright__ = "Copyright 2015 Tom Brown, GNU GPL 3" import sys, os, getopt, re, urllib, gzip def findRefType(ref): ref = ref.replace('arxiv:','') if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref): type = 'old-style eprint' elif re.search(r'^\d{7}$',ref): type = 'old-style eprint' ref = 'hep-th/' + ref elif re.search('^\d{4}\.\d{4,5}$',ref): type = 'new-style eprint' else: type = 'not arXiv' return type, ref def downloadPDF(ref,type,downloadPath): downloadPath = os.path.expanduser(downloadPath) if type == 'old-style eprint': urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf') elif type == 'new-style eprint': urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf') def downloadPS(ref,type,downloadPath): downloadPath = os.path.expanduser(downloadPath) filename = downloadPath + ref.replace('/','-') urllib.urlretrieve('http://arxiv.org/ps/' + ref, filename) gzipFile = gzip.GzipFile(filename) psFile = open(filename + ".ps","w") psFile.write(gzipFile.read()) psFile.close() gzipFile.close() os.remove(filename) def downloadSource(ref,type,downloadPath): downloadPath = os.path.expanduser(downloadPath) filename = downloadPath + ref.replace('/','-') urllib.urlretrieve('http://arxiv.org/e-print/' + ref, filename + ".dum") gzipFile = gzip.GzipFile(filename + ".dum") sourceFile = open(filename,"w") sourceFile.write(gzipFile.read()) sourceFile.close() gzipFile.close() os.remove(filename + ".dum") def getTitle(html): title = html[html.find(">Title:</span>")+15:] title = title[:title.find("</h1>")] return title def getAuthors(html): authors = html[html.find(">Authors:</span>"):] authors = authors[authors.find("\">")+2:] authors = authors[:authors.find("</div>")] authors = re.sub('<[^>]*>','',authors) authors = authors.replace("\n","") return authors def getAbstract(html): abstract = html[html.find("Abstract:</span>")+17:] abstract = abstract[:abstract.find("</blockquote>")-1] return abstract def getComments(html): if "comments" not in html: return "no comments" else: comments = html[html.find("comments\">")+10:] comments = comments[:comments.find("</td>")] return comments def getJref(html): if "jref" not in html: return "no journal reference" else: jref = html[html.find("jref\">")+6:] jref = jref[:jref.find("</td>")] return jref if __name__ == "__main__": authorOpt = 0 titleOpt = 0 abstractOpt = 0 commentsOpt = 0 jrefOpt = 0 pdfOpt = 0 psOpt = 0 sourceOpt = 0 try: options, arguments = getopt.gnu_getopt(sys.argv[1:], 'hatbcjdpsv', ['help']) except getopt.error: print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'arxiv.py -h\' for more information' sys.exit(0) for o,a in options: if o in ('-h','--help'): print __doc__ sys.exit(0) elif o == '-a': authorOpt = 1 elif o == '-t': titleOpt = 1 elif o == '-b': abstractOpt = 1 elif o == '-c': commentsOpt = 1 elif o == '-j': jrefOpt = 1 elif o == '-d': pdfOpt = 1 elif o == '-p': psOpt = 1 elif o == '-s': sourceOpt = 1 if len(options) == 0: authorOpt = 1 titleOpt = 1 abstractOpt = 1 commentsOpt = 1 jrefOpt = 1 if len(arguments) != 1: print 'you didn\'t specify an arXiv reference; try \'arxiv.py -h\' for more information' sys.exit(0) else: ref=arguments[0] type, ref = findRefType(ref) print "Reference",ref,"is of type",type if type=="not arXiv": print "type not of arXiv form" sys.exit(0) if authorOpt+titleOpt+abstractOpt+commentsOpt+jrefOpt > 0: htmlObject = urllib.urlopen('http://arxiv.org/abs/' + ref) html = htmlObject.read() if titleOpt: title = getTitle(html) print title if authorOpt: authors = getAuthors(html) print authors if abstractOpt: abstract = getAbstract(html) print abstract if commentsOpt: comments = getComments(html) print comments if jrefOpt: jref = getJref(html) print jref if pdfOpt: downloadPDF(ref,type,"") if psOpt: downloadPS(ref,type,"") if sourceOpt: downloadSource(ref,type,"")