Difference between revisions of "Spires.py"

From String Theory Wiki
Jump to: navigation, search
m (link to usage instructions)
(use inSPIRE instead of SPIRES - only works for arXiv preprint references at the moment)
Line 5: Line 5:
 
#! /usr/bin/python
 
#! /usr/bin/python
  
## SPIRES script version 0.3
+
## SPIRES script version 0.4
  
## Copyright 2007 Tom Brown
+
## updated for inSPIRE
 +
 
 +
## Copyright 2013 Tom Brown
  
 
## This program is free software; you can redistribute it and/or
 
## This program is free software; you can redistribute it and/or
Line 61: Line 63:
  
  
 +
 +
#\d is a decimal digit; \D is anything but a decimal digit
 
def findRefType(ref):
 
def findRefType(ref):
 
     ref = ref.replace('arxiv:','')
 
     ref = ref.replace('arxiv:','')
     if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref):
+
     if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref):
         type = 'old-style eprint'
+
         rType = 'old-style eprint'
 
     elif re.search(r'^\d{7}$',ref):
 
     elif re.search(r'^\d{7}$',ref):
         type = 'old-style eprint'
+
         rType = 'old-style eprint'
 
         ref = 'hep-th/' + ref
 
         ref = 'hep-th/' + ref
 
     elif re.search('^\d{4}\.\d{4}$',ref):
 
     elif re.search('^\d{4}\.\d{4}$',ref):
         type = 'new-style eprint'
+
         rType = 'new-style eprint'
 
     elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref):
 
     elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref):
         type = 'texkey'
+
         rType = 'texkey'
 
     else:
 
     else:
         type = 'journal'
+
         rType = 'journal'
  
     return type, ref
+
     return rType, ref
  
  
  
def getBiBTeX(ref,type):
+
def getBiBTeX(ref,rType):
     if type == 'old-style eprint':
+
     if rType == 'old-style eprint':
         query = 'eprint=' + ref
+
         query = 'p=' + ref
     elif type == 'new-style eprint':
+
     elif rType == 'new-style eprint':
         query = 'eprint=arXiv:' + ref
+
         query = 'p=' + ref
     elif type == 'texkey':
+
     elif rType == 'texkey':
 
         query = 'texkey=' + ref
 
         query = 'texkey=' + ref
     elif type == 'journal':
+
     elif rType == 'journal':
 
         query = 'j=' + ref
 
         query = 'j=' + ref
 
     else:
 
     else:
 
         return "no records were found in SPIRES to match your search, please try again"
 
         return "no records were found in SPIRES to match your search, please try again"
  
     BiBTeX = urllib.urlopen('http://www.slac.stanford.edu/spires/find/hep/wwwbriefbibtex?' + query + '&server=sunspi5').read()
+
    #http://inspirehep.net/search?p=hep-th%2F9711200&of=hx
 +
    #http://inspirehep.net/search?p=1101.0121&of=hx
 +
 
 +
 
 +
     BiBTeX = urllib.urlopen('http://inspirehep.net/search?' + query + '&of=hx').read()
  
 
     if 'No records' in BiBTeX:
 
     if 'No records' in BiBTeX:
 
         return "no records were found in SPIRES to match your search, please try again"
 
         return "no records were found in SPIRES to match your search, please try again"
  
     BiBTeX = BiBTeX[BiBTeX.find('<!-- START RESULTS -->'):]
+
     BiBTeX = BiBTeX[BiBTeX.find('<pre>'):]
 
     BiBTeX = BiBTeX[BiBTeX.find('@'):]
 
     BiBTeX = BiBTeX[BiBTeX.find('@'):]
  
     BiBTeX = BiBTeX[:BiBTeX.rfind('<!-- END RESULTS -->')+1]
+
     BiBTeX = BiBTeX[:BiBTeX.rfind('</pre>')+1]
 
     BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
 
     BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
 
          
 
          
Line 106: Line 114:
  
  
def getBibitem(ref,type):
+
def getBibitem(ref,rType):
     if type == 'old-style eprint':
+
     if rType == 'old-style eprint':
         query = 'eprint=' + ref
+
         query = 'p=' + ref
     elif type == 'new-style eprint':
+
     elif rType == 'new-style eprint':
         query = 'eprint=arXiv:' + ref
+
         query = 'p=' + ref
     elif type == 'texkey':
+
     elif rType == 'texkey':
 
         query = 'texkey=' + ref
 
         query = 'texkey=' + ref
     elif type == 'journal':
+
     elif rType == 'journal':
 
         query = 'j=' + ref
 
         query = 'j=' + ref
 
     else:
 
     else:
 
         return "no records were found in SPIRES to match your search, please try again"
 
         return "no records were found in SPIRES to match your search, please try again"
  
     bibitem = urllib.urlopen('http://www.slac.stanford.edu/spires/find/hep/wwwbrieflatex2?' + query + '&server=sunspi5').read()
+
     bibitem = urllib.urlopen('http://inspirehep.net/search?' + query + '&of=hlxe').read()
  
 
     if 'No records' in bibitem:
 
     if 'No records' in bibitem:
 
         return "no records were found in SPIRES to match your search, please try again"
 
         return "no records were found in SPIRES to match your search, please try again"
  
     bibitem = bibitem[bibitem.find('<!-- START RESULTS -->'):]
+
     bibitem = bibitem[bibitem.find('<pre>')+5:bibitem.rfind('</pre>')]
    bibitem = bibitem[bibitem.find('%'):]
+
 
      
+
     #treat newlines correctly
     bibitem = bibitem[:bibitem.rfind('<!-- END RESULTS -->')+1]
+
     bibitem = bibitem.replace("<br>","\n").replace("<br />","\n").replace("&nbsp;"," ")
    bibitem = bibitem[:bibitem.rfind('%')+1]
 
  
 
     return bibitem
 
     return bibitem
Line 163: Line 170:
  
  
def downloadeprint(ref,type,downloadPath):
+
def downloadeprint(ref,rType,downloadPath):
 
     downloadPath = os.path.expanduser(downloadPath)
 
     downloadPath = os.path.expanduser(downloadPath)
     if type == 'old-style eprint':
+
     if rType == 'old-style eprint':
 
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
 
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
     elif type == 'new-style eprint':
+
     elif rType == 'new-style eprint':
 
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')
 
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')
  
Line 178: Line 185:
 
     libraryfile.close()
 
     libraryfile.close()
  
     if cite in library:
+
     if library.count(cite) == 0:
 
         print 'adding BiBTeX entry to ' + BiBTeXlibraryFileName
 
         print 'adding BiBTeX entry to ' + BiBTeXlibraryFileName
 
         #find the end of the file (the second argument means count from
 
         #find the end of the file (the second argument means count from
Line 194: Line 201:
 
     f=open(fileName, 'r')
 
     f=open(fileName, 'r')
 
     data = f.read()
 
     data = f.read()
 +
    f.close()
  
     if r'\begin{thebibliography}' in data:
+
     if data.find(r'\begin{thebibliography}') >= 0:
 
         data = data[:data.find(r'\begin{thebibliography}')]
 
         data = data[:data.find(r'\begin{thebibliography}')]
  
 
     citations = []
 
     citations = []
  
     while r'\cite{' in data:
+
     while(data.find(r'\cite{') >=0):
 
         data = data[data.find(r'\cite{') + 6:]
 
         data = data[data.find(r'\cite{') + 6:]
         if '}' in data:
+
         if(data.find('}') >=0):
 
             citation = data[:data.find('}')]
 
             citation = data[:data.find('}')]
 
             data = data[data.find('}')+1:]
 
             data = data[data.find('}')+1:]
             citation += ','
+
             citation = citation + ','
             while ',' in citation:
+
             while(citation.find(',') >=0):
 
                 if citation[:citation.find(',')] not in citations:
 
                 if citation[:citation.find(',')] not in citations:
 
                     citations.append(citation[:citation.find(',')])
 
                     citations.append(citation[:citation.find(',')])
Line 216: Line 224:
 
if __name__ == "__main__":
 
if __name__ == "__main__":
  
     authorOpt = False
+
     authorOpt = 0
     titleOpt = False
+
     titleOpt = 0
     bibtexOpt = False
+
     bibtexOpt = 0
     bibitemOpt = False
+
     bibitemOpt = 0
     citeOpt = False
+
     citeOpt = 0
     verboseOpt = False
+
     verboseOpt = 0
     libraryOpt = False
+
     libraryOpt = 0
     downloadOpt = False
+
     downloadOpt = 0
  
 
     try:
 
     try:
Line 246: Line 254:
  
 
         elif o == '--download':
 
         elif o == '--download':
             downloadOpt = True
+
             downloadOpt = 1
 
             if a == '':
 
             if a == '':
 
                 a = './'
 
                 a = './'
 
             elif a[-1] != '/':
 
             elif a[-1] != '/':
                 a += '/'
+
                 a = a + '/'
 
             downloadPath = os.path.expanduser(a)
 
             downloadPath = os.path.expanduser(a)
 
             print 'download path is ' + downloadPath
 
             print 'download path is ' + downloadPath
  
 
         elif o == '-b':
 
         elif o == '-b':
             bibtexOpt = True
+
             bibtexOpt = 1
  
 
         elif o == '-i':
 
         elif o == '-i':
             bibitemOpt = True
+
             bibitemOpt = 1
  
 
         elif o == '-a':
 
         elif o == '-a':
             authorOpt = True
+
             authorOpt = 1
  
 
         elif o == '-t':
 
         elif o == '-t':
             titleOpt = True
+
             titleOpt = 1
  
 
         elif o == '-c':
 
         elif o == '-c':
             citeOpt = True
+
             citeOpt = 1
  
 
         elif o == '-e':
 
         elif o == '-e':
             bibtexOpt = True
+
             bibtexOpt = 1
             authorOpt = True
+
             authorOpt = 1
             titleOpt = True
+
             titleOpt = 1
             citeOpt = True
+
             citeOpt = 1
  
 
         elif o == '-v':
 
         elif o == '-v':
             verboseOpt = True
+
             verboseOpt = 1
  
 
     if len(arguments) != 1:
 
     if len(arguments) != 1:
         print "you didn't specify a SPIRES reference; try 'spires.py -h' for more information"
+
         print 'you didn\'t specify a SPIRES reference; try \'spires.py -h\' for more information'
 
         sys.exit(0)
 
         sys.exit(0)
 
     else:
 
     else:
Line 285: Line 293:
  
 
     if len(options) == 0:
 
     if len(options) == 0:
         bibtexOpt = True
+
         bibtexOpt = 1
         authorOpt = True
+
         authorOpt = 1
         titleOpt = True
+
         titleOpt = 1
         citeOpt = True
+
         citeOpt = 1
 
 
  
     type, ref = findRefType(ref)
+
     print ref
 +
    rType, ref = findRefType(ref)
 +
    print rType, ref
  
 
     if verboseOpt:
 
     if verboseOpt:
         print 'the reference ' + ref + ' is a(n) ' + type
+
         print 'the reference ' + ref + ' is a(n) ' + rType
  
     if bibtexOpt or authorOpt or titleOpt or citeOpt or libraryOpt:
+
     if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0:
         BiBTeX = getBiBTeX(ref,type)
+
         BiBTeX = getBiBTeX(ref,rType)
 
         if 'no records' in BiBTeX:
 
         if 'no records' in BiBTeX:
 
             print BiBTeX
 
             print BiBTeX
Line 306: Line 315:
  
 
     if bibitemOpt:
 
     if bibitemOpt:
         bibitem = getBibitem(ref,type)
+
         bibitem = getBibitem(ref,rType)
 
         print bibitem
 
         print bibitem
 
         if 'no records' in bibitem:
 
         if 'no records' in bibitem:
Line 319: Line 328:
 
         print title
 
         print title
  
     if citeOpt or libraryOpt:
+
     if (citeOpt + libraryOpt) > 0:
 
         cite = extractcite(BiBTeX)
 
         cite = extractcite(BiBTeX)
  
Line 329: Line 338:
  
 
     if downloadOpt:
 
     if downloadOpt:
         downloadeprint(ref,type,downloadPath)
+
         downloadeprint(ref,rType,downloadPath)
 
</pre>
 
</pre>

Revision as of 19:03, 24 February 2013

Copy the following text into a file called spires.py and save it.

Further usage instructions are available at SPIRES_script.

#! /usr/bin/python

## SPIRES script version 0.4

## updated for inSPIRE

## Copyright 2013 Tom Brown

## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

## See http://www.stringwiki.org/wiki/SPIRES_script for more usage
## instructions

'''SPIRES script
Usage:
python spires.py reference [ -hbiatcev ] [ --help ] [ --library library.bib ] [ --download download_path/ ]
"reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303, Maldacena:1997re or a SPIRES journal reference, e.g. CMPHA,43,199
Options:
-h, --help
displays this help message
-b
displays the BiBTeX entry
-i
displays the bibitem entry
-a
displays the author(s)
-t
displays the title
-c
displays the TeX citation key
-e
displays everything
-v
verbose mode

--download download_path/
for arXiv references downloads a pdf of the paper from the arXiv to the directory download_path/
--library library.bib
if it is not already in library.bib, appends the BiBTeX entry to library.bib; use at your own risk
'''

__version__ = "0.2"
__author__ = "Tom Brown"
__copyright__ = "Copyright 2007 Tom Brown, GNU GPL 3"


import sys, os, getopt, re, urllib



#\d is a decimal digit; \D is anything but a decimal digit
def findRefType(ref):
    ref = ref.replace('arxiv:','')
    if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref):
        rType = 'old-style eprint'
    elif re.search(r'^\d{7}$',ref):
        rType = 'old-style eprint'
        ref = 'hep-th/' + ref
    elif re.search('^\d{4}\.\d{4}$',ref):
        rType = 'new-style eprint'
    elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref):
        rType = 'texkey'
    else:
        rType = 'journal'

    return rType, ref



def getBiBTeX(ref,rType):
    if rType == 'old-style eprint':
        query = 'p=' + ref
    elif rType == 'new-style eprint':
        query = 'p=' + ref
    elif rType == 'texkey':
        query = 'texkey=' + ref
    elif rType == 'journal':
        query = 'j=' + ref
    else:
        return "no records were found in SPIRES to match your search, please try again"

    #http://inspirehep.net/search?p=hep-th%2F9711200&of=hx
    #http://inspirehep.net/search?p=1101.0121&of=hx


    BiBTeX = urllib.urlopen('http://inspirehep.net/search?' + query + '&of=hx').read()

    if 'No records' in BiBTeX:
        return "no records were found in SPIRES to match your search, please try again"

    BiBTeX = BiBTeX[BiBTeX.find('<pre>'):]
    BiBTeX = BiBTeX[BiBTeX.find('@'):]

    BiBTeX = BiBTeX[:BiBTeX.rfind('
')+1]
   BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
       
   return BiBTeX


def getBibitem(ref,rType):

   if rType == 'old-style eprint':
       query = 'p=' + ref
   elif rType == 'new-style eprint':
       query = 'p=' + ref
   elif rType == 'texkey':
       query = 'texkey=' + ref
   elif rType == 'journal':
       query = 'j=' + ref
   else:
       return "no records were found in SPIRES to match your search, please try again"
   bibitem = urllib.urlopen('http://inspirehep.net/search?' + query + '&of=hlxe').read()
   if 'No records' in bibitem:
       return "no records were found in SPIRES to match your search, please try again"
bibitem = bibitem[bibitem.find('
')+5:bibitem.rfind('
')]
   #treat newlines correctly
   bibitem = bibitem.replace("
","\n").replace("
","\n").replace(" "," ")
   return bibitem


def extractauthor(BiBTeX):

   # remove excess white space and replace with a single space
   data = re.sub(r'\s+',r' ',BiBTeX)
   author = data[data.find('author'):]
   author = author[author.find('\"')+1:]
   author = author[:author.find('\"')]
   return author


def extracttitle(BiBTeX):

   # remove excess white space and replace with a single space
   data = re.sub(r'\s+',r' ',BiBTeX)
   title = data[data.find('title'):]
   title = title[title.find('\"')+2:]
   title = title[:title.find('\"')-1]
   return title


def extractcite(BiBTeX):

   cite = BiBTeX[BiBTeX.find('{')+1:BiBTeX.find(',')]
   return cite


def downloadeprint(ref,rType,downloadPath):

   downloadPath = os.path.expanduser(downloadPath)
   if rType == 'old-style eprint':
       urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
   elif rType == 'new-style eprint':
       urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')


def updatelibrary(cite,BiBTeX,BiBTeXlibraryFileName):

   BiBTeXlibraryFileName = os.path.expanduser(BiBTeXlibraryFileName)
   libraryfile = open(BiBTeXlibraryFileName, 'r')
   library = libraryfile.read()
   libraryfile.close()
   if library.count(cite) == 0:
       print 'adding BiBTeX entry to ' + BiBTeXlibraryFileName
       #find the end of the file (the second argument means count from
       #the end of the file
       libraryfile = open(BiBTeXlibraryFileName, 'a')
       libraryfile.write('\n' + BiBTeX + '\n')
       libraryfile.close()      
   else:
       print 'BiBTeX entry already in library'


def listCitations(fileName):

   fileName = os.path.expanduser(fileName)
   f=open(fileName, 'r')
   data = f.read()
   f.close()
   if data.find(r'\begin{thebibliography}') >= 0:
       data = data[:data.find(r'\begin{thebibliography}')]
   citations = []
   while(data.find(r'\cite{') >=0):
       data = data[data.find(r'\cite{') + 6:]
       if(data.find('}') >=0):
           citation = data[:data.find('}')]
           data = data[data.find('}')+1:]
           citation = citation + ','
           while(citation.find(',') >=0):
               if citation[:citation.find(',')] not in citations:
                   citations.append(citation[:citation.find(',')])
               citation = citation[citation.find(',')+1:]
   return citations


if __name__ == "__main__":

   authorOpt = 0
   titleOpt = 0
   bibtexOpt = 0
   bibitemOpt = 0
   citeOpt = 0
   verboseOpt = 0
   libraryOpt = 0
   downloadOpt = 0
   try:
       options, arguments = getopt.gnu_getopt(sys.argv[1:], 
       'hbiatcev', ['help','library=','download='])
   except getopt.error:
       print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'spires.py -h\' for more information'
       sys.exit(0)
   for o,a in options:
       if o in  ('-h','--help'):
           print __doc__
           sys.exit(0)
       elif o == '--library':
           if a == :
               print '--library expects an argument'
               sys.exit(0)
           libraryOpt = 1
           BiBTeXlibraryFileName = os.path.expanduser(a)
           print 'library file name is ' + BiBTeXlibraryFileName
       elif o == '--download':
           downloadOpt = 1
           if a == :
               a = './'
           elif a[-1] != '/':
               a = a + '/'
           downloadPath = os.path.expanduser(a)
           print 'download path is ' + downloadPath
       elif o == '-b':
           bibtexOpt = 1
       elif o == '-i':
           bibitemOpt = 1
       elif o == '-a':
           authorOpt = 1
       elif o == '-t':
           titleOpt = 1
       elif o == '-c':
           citeOpt = 1
       elif o == '-e':
           bibtexOpt = 1
           authorOpt = 1
           titleOpt = 1
           citeOpt = 1
       elif o == '-v':
           verboseOpt = 1
   if len(arguments) != 1:
       print 'you didn\'t specify a SPIRES reference; try \'spires.py -h\' for more information'
       sys.exit(0)
   else:
       ref=arguments[0]
   if len(options) == 0:
       bibtexOpt = 1
       authorOpt = 1
       titleOpt = 1
       citeOpt = 1
   print ref
   rType, ref = findRefType(ref)
   print rType, ref
   if verboseOpt:
       print 'the reference ' + ref + ' is a(n) ' + rType
   if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0:
       BiBTeX = getBiBTeX(ref,rType)
       if 'no records' in BiBTeX:
           print BiBTeX
           sys.exit(0)
   if bibtexOpt:
       print BiBTeX
   if bibitemOpt:
       bibitem = getBibitem(ref,rType)
       print bibitem
       if 'no records' in bibitem:
           sys.exit(0)
   if authorOpt:
       author = extractauthor(BiBTeX)
       print author
   if titleOpt:
       title = extracttitle(BiBTeX)
       print title
   if (citeOpt + libraryOpt) > 0:
       cite = extractcite(BiBTeX)
   if citeOpt:
       print '\cite{' + cite + '}'
   if libraryOpt:
       updatelibrary(cite,BiBTeX,BiBTeXlibraryFileName)
   if downloadOpt:
       downloadeprint(ref,rType,downloadPath)

</pre>