Spires.py: Difference between revisions
Jump to navigation
Jump to search
(take account of curly brackets in SPIRES BiBTeX titles) |
(use old API after new INSPIRE release) |
||
| (9 intermediate revisions by 2 users not shown) | |||
| Line 1: | Line 1: | ||
Copy the following text into a file called spires.py | Copy the following text into a file called spires.py and save it. | ||
Further usage instructions are available at [[SPIRES_script]]. | |||
<pre> | <pre> | ||
#! /usr/bin/python | #! /usr/bin/python | ||
## SPIRES script version 0. | ## SPIRES script version 0.6 | ||
## updated for inSPIRE | |||
## Copyright | ## Copyright 2015 Tom Brown | ||
## This program is free software; you can redistribute it and/or | ## This program is free software; you can redistribute it and/or | ||
| Line 51: | Line 55: | ||
''' | ''' | ||
__version__ = "0. | __version__ = "0.6" | ||
__author__ = "Tom Brown" | __author__ = "Tom Brown" | ||
__copyright__ = "Copyright | __copyright__ = "Copyright 2015 Tom Brown, GNU GPL 3" | ||
| Line 59: | Line 63: | ||
#\d is a decimal digit; \D is anything but a decimal digit | |||
def findRefType(ref): | def findRefType(ref): | ||
ref = ref.replace('arxiv:','') | ref = ref.replace('arxiv:','') | ||
if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref): | if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref): | ||
rType = 'old-style eprint' | |||
elif re.search(r'^\d{7}$',ref): | elif re.search(r'^\d{7}$',ref): | ||
rType = 'old-style eprint' | |||
ref = 'hep-th/' + ref | ref = 'hep-th/' + ref | ||
elif re.search('^\d{4}\.\d{4}$',ref): | elif re.search('^\d{4}\.\d{4,5}$',ref): | ||
rType = 'new-style eprint' | |||
elif re.search(r'^\D+:\d{4}[a-zA-Z]{2}$',ref): | elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref): | ||
rType = 'texkey' | |||
else: | else: | ||
rType = 'journal' | |||
return | return rType, ref | ||
def getBiBTeX(ref, | def getBiBTeX(ref,rType): | ||
if | if rType == 'old-style eprint': | ||
query = 'eprint | query = 'p=find+eprint+' + ref | ||
elif | elif rType == 'new-style eprint': | ||
query = 'eprint | query = 'p=find+eprint+' + ref | ||
elif | elif rType == 'texkey': | ||
query = 'texkey=' + ref | query = 'texkey=' + ref | ||
elif | elif rType == 'journal': | ||
query = 'j=' + ref | query = 'j=' + ref | ||
else: | else: | ||
return "no records were found in SPIRES to match your search, please try again" | return "no records were found in SPIRES to match your search, please try again" | ||
BiBTeX = urllib.urlopen('http:// | #http://inspirehep.net/search?p=hep-th%2F9711200&of=hx | ||
#http://inspirehep.net/search?p=1101.0121&of=hx | |||
BiBTeX = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hx').read() | |||
if 'No records' in BiBTeX: | if 'No records' in BiBTeX: | ||
return "no records were found in SPIRES to match your search, please try again" | return "no records were found in SPIRES to match your search, please try again" | ||
BiBTeX = BiBTeX[BiBTeX.find('< | BiBTeX = BiBTeX[BiBTeX.find('<pre>'):] | ||
BiBTeX = BiBTeX[BiBTeX.find('@'):] | BiBTeX = BiBTeX[BiBTeX.find('@'):] | ||
BiBTeX = BiBTeX[:BiBTeX.rfind(' | BiBTeX = BiBTeX[:BiBTeX.rfind('/pre>')] | ||
BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1] | BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1] | ||
| Line 104: | Line 114: | ||
def getBibitem(ref, | def getBibitem(ref,rType): | ||
if | if rType == 'old-style eprint': | ||
query = 'eprint | query = 'p=find+eprint+' + ref | ||
elif | elif rType == 'new-style eprint': | ||
query = 'eprint | query = 'p=find+eprint+' + ref | ||
elif | elif rType == 'texkey': | ||
query = 'texkey=' + ref | query = 'texkey=' + ref | ||
elif | elif rType == 'journal': | ||
query = 'j=' + ref | query = 'j=' + ref | ||
else: | else: | ||
return "no records were found in SPIRES to match your search, please try again" | return "no records were found in SPIRES to match your search, please try again" | ||
bibitem = urllib.urlopen('http:// | bibitem = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hlxe').read() | ||
if 'No records' in bibitem: | if 'No records' in bibitem: | ||
return "no records were found in SPIRES to match your search, please try again" | return "no records were found in SPIRES to match your search, please try again" | ||
bibitem = bibitem[bibitem.find('< | bibitem = bibitem[bibitem.find('<pre>')+5:bibitem.rfind('/pre>')-1] | ||
#treat newlines correctly | |||
bibitem = bibitem | bibitem = bibitem.replace("<br>","\n").replace("<br />","\n").replace("&nbsp;"," ") | ||
return bibitem | return bibitem | ||
| Line 161: | Line 170: | ||
def downloadeprint(ref, | def downloadeprint(ref,rType,downloadPath): | ||
downloadPath = os.path.expanduser(downloadPath) | downloadPath = os.path.expanduser(downloadPath) | ||
if | if rType == 'old-style eprint': | ||
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf') | urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf') | ||
elif | elif rType == 'new-style eprint': | ||
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf') | urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf') | ||
| Line 192: | Line 201: | ||
f=open(fileName, 'r') | f=open(fileName, 'r') | ||
data = f.read() | data = f.read() | ||
f.close() | |||
if data.find(r'\begin{thebibliography}') >= 0: | if data.find(r'\begin{thebibliography}') >= 0: | ||
| Line 288: | Line 298: | ||
citeOpt = 1 | citeOpt = 1 | ||
print ref | |||
rType, ref = findRefType(ref) | |||
print rType, ref | |||
if verboseOpt: | if verboseOpt: | ||
print 'the reference ' + ref + ' is a(n) ' + | print 'the reference ' + ref + ' is a(n) ' + rType | ||
if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0: | if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0: | ||
BiBTeX = getBiBTeX(ref, | BiBTeX = getBiBTeX(ref,rType) | ||
if 'no records' in BiBTeX: | if 'no records' in BiBTeX: | ||
print BiBTeX | print BiBTeX | ||
| Line 304: | Line 315: | ||
if bibitemOpt: | if bibitemOpt: | ||
bibitem = getBibitem(ref, | bibitem = getBibitem(ref,rType) | ||
print bibitem | print bibitem | ||
if 'no records' in bibitem: | if 'no records' in bibitem: | ||
| Line 327: | Line 338: | ||
if downloadOpt: | if downloadOpt: | ||
downloadeprint(ref, | downloadeprint(ref,rType,downloadPath) | ||
</pre> | </pre> | ||
Latest revision as of 10:51, 31 March 2020
Copy the following text into a file called spires.py and save it.
Further usage instructions are available at SPIRES_script.
#! /usr/bin/python
## SPIRES script version 0.6
## updated for inSPIRE
## Copyright 2015 Tom Brown
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
## See http://www.stringwiki.org/wiki/SPIRES_script for more usage
## instructions
'''SPIRES script
Usage:
python spires.py reference [ -hbiatcev ] [ --help ] [ --library library.bib ] [ --download download_path/ ]
"reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303, Maldacena:1997re or a SPIRES journal reference, e.g. CMPHA,43,199
Options:
-h, --help
displays this help message
-b
displays the BiBTeX entry
-i
displays the bibitem entry
-a
displays the author(s)
-t
displays the title
-c
displays the TeX citation key
-e
displays everything
-v
verbose mode
--download download_path/
for arXiv references downloads a pdf of the paper from the arXiv to the directory download_path/
--library library.bib
if it is not already in library.bib, appends the BiBTeX entry to library.bib; use at your own risk
'''
__version__ = "0.6"
__author__ = "Tom Brown"
__copyright__ = "Copyright 2015 Tom Brown, GNU GPL 3"
import sys, os, getopt, re, urllib
#\d is a decimal digit; \D is anything but a decimal digit
def findRefType(ref):
ref = ref.replace('arxiv:','')
if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref):
rType = 'old-style eprint'
elif re.search(r'^\d{7}$',ref):
rType = 'old-style eprint'
ref = 'hep-th/' + ref
elif re.search('^\d{4}\.\d{4,5}$',ref):
rType = 'new-style eprint'
elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref):
rType = 'texkey'
else:
rType = 'journal'
return rType, ref
def getBiBTeX(ref,rType):
if rType == 'old-style eprint':
query = 'p=find+eprint+' + ref
elif rType == 'new-style eprint':
query = 'p=find+eprint+' + ref
elif rType == 'texkey':
query = 'texkey=' + ref
elif rType == 'journal':
query = 'j=' + ref
else:
return "no records were found in SPIRES to match your search, please try again"
#http://inspirehep.net/search?p=hep-th%2F9711200&of=hx
#http://inspirehep.net/search?p=1101.0121&of=hx
BiBTeX = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hx').read()
if 'No records' in BiBTeX:
return "no records were found in SPIRES to match your search, please try again"
BiBTeX = BiBTeX[BiBTeX.find('<pre>'):]
BiBTeX = BiBTeX[BiBTeX.find('@'):]
BiBTeX = BiBTeX[:BiBTeX.rfind('/pre>')]
BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
return BiBTeX
def getBibitem(ref,rType):
if rType == 'old-style eprint':
query = 'p=find+eprint+' + ref
elif rType == 'new-style eprint':
query = 'p=find+eprint+' + ref
elif rType == 'texkey':
query = 'texkey=' + ref
elif rType == 'journal':
query = 'j=' + ref
else:
return "no records were found in SPIRES to match your search, please try again"
bibitem = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hlxe').read()
if 'No records' in bibitem:
return "no records were found in SPIRES to match your search, please try again"
bibitem = bibitem[bibitem.find('<pre>')+5:bibitem.rfind('/pre>')-1]
#treat newlines correctly
bibitem = bibitem.replace("<br>","\n").replace("<br />","\n").replace(" "," ")
return bibitem
def extractauthor(BiBTeX):
# remove excess white space and replace with a single space
data = re.sub(r'\s+',r' ',BiBTeX)
author = data[data.find('author'):]
author = author[author.find('\"')+1:]
author = author[:author.find('\"')]
return author
def extracttitle(BiBTeX):
# remove excess white space and replace with a single space
data = re.sub(r'\s+',r' ',BiBTeX)
title = data[data.find('title'):]
title = title[title.find('\"')+2:]
title = title[:title.find('\"')-1]
return title
def extractcite(BiBTeX):
cite = BiBTeX[BiBTeX.find('{')+1:BiBTeX.find(',')]
return cite
def downloadeprint(ref,rType,downloadPath):
downloadPath = os.path.expanduser(downloadPath)
if rType == 'old-style eprint':
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
elif rType == 'new-style eprint':
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')
def updatelibrary(cite,BiBTeX,BiBTeXlibraryFileName):
BiBTeXlibraryFileName = os.path.expanduser(BiBTeXlibraryFileName)
libraryfile = open(BiBTeXlibraryFileName, 'r')
library = libraryfile.read()
libraryfile.close()
if library.count(cite) == 0:
print 'adding BiBTeX entry to ' + BiBTeXlibraryFileName
#find the end of the file (the second argument means count from
#the end of the file
libraryfile = open(BiBTeXlibraryFileName, 'a')
libraryfile.write('\n' + BiBTeX + '\n')
libraryfile.close()
else:
print 'BiBTeX entry already in library'
def listCitations(fileName):
fileName = os.path.expanduser(fileName)
f=open(fileName, 'r')
data = f.read()
f.close()
if data.find(r'\begin{thebibliography}') >= 0:
data = data[:data.find(r'\begin{thebibliography}')]
citations = []
while(data.find(r'\cite{') >=0):
data = data[data.find(r'\cite{') + 6:]
if(data.find('}') >=0):
citation = data[:data.find('}')]
data = data[data.find('}')+1:]
citation = citation + ','
while(citation.find(',') >=0):
if citation[:citation.find(',')] not in citations:
citations.append(citation[:citation.find(',')])
citation = citation[citation.find(',')+1:]
return citations
if __name__ == "__main__":
authorOpt = 0
titleOpt = 0
bibtexOpt = 0
bibitemOpt = 0
citeOpt = 0
verboseOpt = 0
libraryOpt = 0
downloadOpt = 0
try:
options, arguments = getopt.gnu_getopt(sys.argv[1:],
'hbiatcev', ['help','library=','download='])
except getopt.error:
print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'spires.py -h\' for more information'
sys.exit(0)
for o,a in options:
if o in ('-h','--help'):
print __doc__
sys.exit(0)
elif o == '--library':
if a == '':
print '--library expects an argument'
sys.exit(0)
libraryOpt = 1
BiBTeXlibraryFileName = os.path.expanduser(a)
print 'library file name is ' + BiBTeXlibraryFileName
elif o == '--download':
downloadOpt = 1
if a == '':
a = './'
elif a[-1] != '/':
a = a + '/'
downloadPath = os.path.expanduser(a)
print 'download path is ' + downloadPath
elif o == '-b':
bibtexOpt = 1
elif o == '-i':
bibitemOpt = 1
elif o == '-a':
authorOpt = 1
elif o == '-t':
titleOpt = 1
elif o == '-c':
citeOpt = 1
elif o == '-e':
bibtexOpt = 1
authorOpt = 1
titleOpt = 1
citeOpt = 1
elif o == '-v':
verboseOpt = 1
if len(arguments) != 1:
print 'you didn\'t specify a SPIRES reference; try \'spires.py -h\' for more information'
sys.exit(0)
else:
ref=arguments[0]
if len(options) == 0:
bibtexOpt = 1
authorOpt = 1
titleOpt = 1
citeOpt = 1
print ref
rType, ref = findRefType(ref)
print rType, ref
if verboseOpt:
print 'the reference ' + ref + ' is a(n) ' + rType
if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0:
BiBTeX = getBiBTeX(ref,rType)
if 'no records' in BiBTeX:
print BiBTeX
sys.exit(0)
if bibtexOpt:
print BiBTeX
if bibitemOpt:
bibitem = getBibitem(ref,rType)
print bibitem
if 'no records' in bibitem:
sys.exit(0)
if authorOpt:
author = extractauthor(BiBTeX)
print author
if titleOpt:
title = extracttitle(BiBTeX)
print title
if (citeOpt + libraryOpt) > 0:
cite = extractcite(BiBTeX)
if citeOpt:
print '\cite{' + cite + '}'
if libraryOpt:
updatelibrary(cite,BiBTeX,BiBTeXlibraryFileName)
if downloadOpt:
downloadeprint(ref,rType,downloadPath)