Spires.py: Difference between revisions

From String Theory Wiki
Jump to navigation Jump to search
No edit summary
 
(use old API after new INSPIRE release)
 
(10 intermediate revisions by 2 users not shown)
Line 1: Line 1:
Copy the following text into a file called spires.py with your favourite text editor
Copy the following text into a file called spires.py and save it.
 
Further usage instructions are available at [[SPIRES_script]].
<pre>
<pre>
#! /usr/bin/python
#! /usr/bin/python


## SPIRES script version 0.3
## SPIRES script version 0.6
 
## updated for inSPIRE


## Copyright 2007 Tom Brown
## Copyright 2015 Tom Brown


## This program is free software; you can redistribute it and/or
## This program is free software; you can redistribute it and/or
Line 51: Line 55:
'''
'''


__version__ = "0.2"
__version__ = "0.6"
__author__ = "Tom Brown"
__author__ = "Tom Brown"
__copyright__ = "Copyright 2007 Tom Brown, GNU GPL 3"
__copyright__ = "Copyright 2015 Tom Brown, GNU GPL 3"




Line 59: Line 63:




#\d is a decimal digit; \D is anything but a decimal digit
def findRefType(ref):
def findRefType(ref):
     ref = ref.replace('arxiv:','')
     ref = ref.replace('arxiv:','')
     if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref):
     if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref):
         type = 'old-style eprint'
         rType = 'old-style eprint'
     elif re.search(r'^\d{7}$',ref):
     elif re.search(r'^\d{7}$',ref):
         type = 'old-style eprint'
         rType = 'old-style eprint'
         ref = 'hep-th/' + ref
         ref = 'hep-th/' + ref
     elif re.search('^\d{4}\.\d{4}$',ref):
     elif re.search('^\d{4}\.\d{4,5}$',ref):
         type = 'new-style eprint'
         rType = 'new-style eprint'
     elif re.search(r'^\D+:\d{4}[a-zA-Z]{2}$',ref):
     elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref):
         type = 'texkey'
         rType = 'texkey'
     else:
     else:
         type = 'journal'
         rType = 'journal'


     return type, ref
     return rType, ref






def getBiBTeX(ref,type):
def getBiBTeX(ref,rType):
     if type == 'old-style eprint':
     if rType == 'old-style eprint':
         query = 'eprint=' + ref
         query = 'p=find+eprint+' + ref
     elif type == 'new-style eprint':
     elif rType == 'new-style eprint':
         query = 'eprint=arXiv:' + ref
         query = 'p=find+eprint+' + ref
     elif type == 'texkey':
     elif rType == 'texkey':
         query = 'texkey=' + ref
         query = 'texkey=' + ref
     elif type == 'journal':
     elif rType == 'journal':
         query = 'j=' + ref
         query = 'j=' + ref
     else:
     else:
         return "no records were found in SPIRES to match your search, please try again"
         return "no records were found in SPIRES to match your search, please try again"


     BiBTeX = urllib.urlopen('http://www.slac.stanford.edu/spires/find/hep/wwwbriefbibtex?' + query + '&server=sunspi5').read()
    #http://inspirehep.net/search?p=hep-th%2F9711200&of=hx
    #http://inspirehep.net/search?p=1101.0121&of=hx
 
 
     BiBTeX = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hx').read()


     if 'No records' in BiBTeX:
     if 'No records' in BiBTeX:
         return "no records were found in SPIRES to match your search, please try again"
         return "no records were found in SPIRES to match your search, please try again"


     BiBTeX = BiBTeX[BiBTeX.find('<!-- START RESULTS -->'):]
     BiBTeX = BiBTeX[BiBTeX.find('<pre>'):]
     BiBTeX = BiBTeX[BiBTeX.find('@'):]
     BiBTeX = BiBTeX[BiBTeX.find('@'):]


     BiBTeX = BiBTeX[:BiBTeX.rfind('<!-- END RESULTS -->')+1]
     BiBTeX = BiBTeX[:BiBTeX.rfind('/pre>')]
     BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
     BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
          
          
Line 104: Line 114:




def getBibitem(ref,type):
def getBibitem(ref,rType):
     if type == 'old-style eprint':
     if rType == 'old-style eprint':
         query = 'eprint=' + ref
         query = 'p=find+eprint+' + ref
     elif type == 'new-style eprint':
     elif rType == 'new-style eprint':
         query = 'eprint=arXiv:' + ref
         query = 'p=find+eprint+' + ref
     elif type == 'texkey':
     elif rType == 'texkey':
         query = 'texkey=' + ref
         query = 'texkey=' + ref
     elif type == 'journal':
     elif rType == 'journal':
         query = 'j=' + ref
         query = 'j=' + ref
     else:
     else:
         return "no records were found in SPIRES to match your search, please try again"
         return "no records were found in SPIRES to match your search, please try again"


     bibitem = urllib.urlopen('http://www.slac.stanford.edu/spires/find/hep/wwwbrieflatex2?' + query + '&server=sunspi5').read()
     bibitem = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hlxe').read()


     if 'No records' in bibitem:
     if 'No records' in bibitem:
         return "no records were found in SPIRES to match your search, please try again"
         return "no records were found in SPIRES to match your search, please try again"


     bibitem = bibitem[bibitem.find('<!-- START RESULTS -->'):]
     bibitem = bibitem[bibitem.find('<pre>')+5:bibitem.rfind('/pre>')-1]
    bibitem = bibitem[bibitem.find('%'):]
 
      
     #treat newlines correctly
     bibitem = bibitem[:bibitem.rfind('<!-- END RESULTS -->')+1]
     bibitem = bibitem.replace("<br>","\n").replace("<br />","\n").replace("&amp;nbsp;"," ")
    bibitem = bibitem[:bibitem.rfind('%')+1]


     return bibitem
     return bibitem
Line 148: Line 157:


     title = data[data.find('title'):]
     title = data[data.find('title'):]
     title = title[title.find('\"')+1:]
     title = title[title.find('\"')+2:]
     title = title[:title.find('\"')]
     title = title[:title.find('\"')-1]


     return title
     return title
Line 161: Line 170:




def downloadeprint(ref,type,downloadPath):
def downloadeprint(ref,rType,downloadPath):
     downloadPath = os.path.expanduser(downloadPath)
     downloadPath = os.path.expanduser(downloadPath)
     if type == 'old-style eprint':
     if rType == 'old-style eprint':
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
     elif type == 'new-style eprint':
     elif rType == 'new-style eprint':
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')
         urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')


Line 192: Line 201:
     f=open(fileName, 'r')
     f=open(fileName, 'r')
     data = f.read()
     data = f.read()
    f.close()


     if data.find(r'\begin{thebibliography}') >= 0:
     if data.find(r'\begin{thebibliography}') >= 0:
Line 288: Line 298:
         citeOpt = 1
         citeOpt = 1


 
    print ref
     type, ref = findRefType(ref)
     rType, ref = findRefType(ref)
    print rType, ref


     if verboseOpt:
     if verboseOpt:
         print 'the reference ' + ref + ' is a(n) ' + type
         print 'the reference ' + ref + ' is a(n) ' + rType


     if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0:
     if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0:
         BiBTeX = getBiBTeX(ref,type)
         BiBTeX = getBiBTeX(ref,rType)
         if 'no records' in BiBTeX:
         if 'no records' in BiBTeX:
             print BiBTeX
             print BiBTeX
Line 304: Line 315:


     if bibitemOpt:
     if bibitemOpt:
         bibitem = getBibitem(ref,type)
         bibitem = getBibitem(ref,rType)
         print bibitem
         print bibitem
         if 'no records' in bibitem:
         if 'no records' in bibitem:
Line 327: Line 338:


     if downloadOpt:
     if downloadOpt:
         downloadeprint(ref,type,downloadPath)
         downloadeprint(ref,rType,downloadPath)
</pre>
</pre>

Latest revision as of 09:51, 31 March 2020

Copy the following text into a file called spires.py and save it.

Further usage instructions are available at SPIRES_script.

#! /usr/bin/python

## SPIRES script version 0.6

## updated for inSPIRE

## Copyright 2015 Tom Brown

## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

## See http://www.stringwiki.org/wiki/SPIRES_script for more usage
## instructions

'''SPIRES script
Usage:
python spires.py reference [ -hbiatcev ] [ --help ] [ --library library.bib ] [ --download download_path/ ]
"reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303, Maldacena:1997re or a SPIRES journal reference, e.g. CMPHA,43,199
Options:
-h, --help
displays this help message
-b
displays the BiBTeX entry
-i
displays the bibitem entry
-a
displays the author(s)
-t
displays the title
-c
displays the TeX citation key
-e
displays everything
-v
verbose mode

--download download_path/
for arXiv references downloads a pdf of the paper from the arXiv to the directory download_path/
--library library.bib
if it is not already in library.bib, appends the BiBTeX entry to library.bib; use at your own risk
'''

__version__ = "0.6"
__author__ = "Tom Brown"
__copyright__ = "Copyright 2015 Tom Brown, GNU GPL 3"


import sys, os, getopt, re, urllib



#\d is a decimal digit; \D is anything but a decimal digit
def findRefType(ref):
    ref = ref.replace('arxiv:','')
    if re.search(r'^[a-zA-Z\-\.]+/\d{7}$',ref):
        rType = 'old-style eprint'
    elif re.search(r'^\d{7}$',ref):
        rType = 'old-style eprint'
        ref = 'hep-th/' + ref
    elif re.search('^\d{4}\.\d{4,5}$',ref):
        rType = 'new-style eprint'
    elif re.search(r'^\D+:\d{4}[a-zA-Z]{2,3}$',ref):
        rType = 'texkey'
    else:
        rType = 'journal'

    return rType, ref



def getBiBTeX(ref,rType):
    if rType == 'old-style eprint':
        query = 'p=find+eprint+' + ref
    elif rType == 'new-style eprint':
        query = 'p=find+eprint+' + ref
    elif rType == 'texkey':
        query = 'texkey=' + ref
    elif rType == 'journal':
        query = 'j=' + ref
    else:
        return "no records were found in SPIRES to match your search, please try again"

    #http://inspirehep.net/search?p=hep-th%2F9711200&of=hx
    #http://inspirehep.net/search?p=1101.0121&of=hx


    BiBTeX = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hx').read()

    if 'No records' in BiBTeX:
        return "no records were found in SPIRES to match your search, please try again"

    BiBTeX = BiBTeX[BiBTeX.find('<pre>'):]
    BiBTeX = BiBTeX[BiBTeX.find('@'):]

    BiBTeX = BiBTeX[:BiBTeX.rfind('/pre>')]
    BiBTeX = BiBTeX[:BiBTeX.rfind('}')+1]
        
    return BiBTeX



def getBibitem(ref,rType):
    if rType == 'old-style eprint':
        query = 'p=find+eprint+' + ref
    elif rType == 'new-style eprint':
        query = 'p=find+eprint+' + ref
    elif rType == 'texkey':
        query = 'texkey=' + ref
    elif rType == 'journal':
        query = 'j=' + ref
    else:
        return "no records were found in SPIRES to match your search, please try again"

    bibitem = urllib.urlopen('http://old.inspirehep.net/search?' + query + '&of=hlxe').read()

    if 'No records' in bibitem:
        return "no records were found in SPIRES to match your search, please try again"

    bibitem = bibitem[bibitem.find('<pre>')+5:bibitem.rfind('/pre>')-1]

    #treat newlines correctly
    bibitem = bibitem.replace("<br>","\n").replace("<br />","\n").replace("&nbsp;"," ")

    return bibitem



def extractauthor(BiBTeX):
    # remove excess white space and replace with a single space
    data = re.sub(r'\s+',r' ',BiBTeX)

    author = data[data.find('author'):]
    author = author[author.find('\"')+1:]
    author = author[:author.find('\"')]

    return author



def extracttitle(BiBTeX):
    # remove excess white space and replace with a single space
    data = re.sub(r'\s+',r' ',BiBTeX)

    title = data[data.find('title'):]
    title = title[title.find('\"')+2:]
    title = title[:title.find('\"')-1]

    return title



def extractcite(BiBTeX):
    cite = BiBTeX[BiBTeX.find('{')+1:BiBTeX.find(',')]
    return cite



def downloadeprint(ref,rType,downloadPath):
    downloadPath = os.path.expanduser(downloadPath)
    if rType == 'old-style eprint':
        urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
    elif rType == 'new-style eprint':
        urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')



def updatelibrary(cite,BiBTeX,BiBTeXlibraryFileName):
    BiBTeXlibraryFileName = os.path.expanduser(BiBTeXlibraryFileName)
    libraryfile = open(BiBTeXlibraryFileName, 'r')
    library = libraryfile.read()
    libraryfile.close()

    if library.count(cite) == 0:
        print 'adding BiBTeX entry to ' + BiBTeXlibraryFileName
        #find the end of the file (the second argument means count from
        #the end of the file
        libraryfile = open(BiBTeXlibraryFileName, 'a')
        libraryfile.write('\n' + BiBTeX + '\n')
        libraryfile.close()      
    else:
        print 'BiBTeX entry already in library'



def listCitations(fileName):
    fileName = os.path.expanduser(fileName)
    f=open(fileName, 'r')
    data = f.read()
    f.close()

    if data.find(r'\begin{thebibliography}') >= 0:
        data = data[:data.find(r'\begin{thebibliography}')]

    citations = []

    while(data.find(r'\cite{') >=0):
        data = data[data.find(r'\cite{') + 6:]
        if(data.find('}') >=0):
            citation = data[:data.find('}')]
            data = data[data.find('}')+1:]
            citation = citation + ','
            while(citation.find(',') >=0):
                if citation[:citation.find(',')] not in citations:
                    citations.append(citation[:citation.find(',')])
                citation = citation[citation.find(',')+1:]
    return citations



if __name__ == "__main__":

    authorOpt = 0
    titleOpt = 0
    bibtexOpt = 0
    bibitemOpt = 0
    citeOpt = 0
    verboseOpt = 0
    libraryOpt = 0
    downloadOpt = 0

    try:
        options, arguments = getopt.gnu_getopt(sys.argv[1:], 
        'hbiatcev', ['help','library=','download='])
    except getopt.error:
        print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'spires.py -h\' for more information'
        sys.exit(0)

    for o,a in options:
        if o in  ('-h','--help'):
            print __doc__
            sys.exit(0)

        elif o == '--library':
            if a == '':
                print '--library expects an argument'
                sys.exit(0)
            libraryOpt = 1
            BiBTeXlibraryFileName = os.path.expanduser(a)
            print 'library file name is ' + BiBTeXlibraryFileName

        elif o == '--download':
            downloadOpt = 1
            if a == '':
                a = './'
            elif a[-1] != '/':
                a = a + '/'
            downloadPath = os.path.expanduser(a)
            print 'download path is ' + downloadPath

        elif o == '-b':
            bibtexOpt = 1

        elif o == '-i':
            bibitemOpt = 1

        elif o == '-a':
            authorOpt = 1

        elif o == '-t':
            titleOpt = 1

        elif o == '-c':
            citeOpt = 1

        elif o == '-e':
            bibtexOpt = 1
            authorOpt = 1
            titleOpt = 1
            citeOpt = 1

        elif o == '-v':
            verboseOpt = 1

    if len(arguments) != 1:
        print 'you didn\'t specify a SPIRES reference; try \'spires.py -h\' for more information'
        sys.exit(0)
    else:
        ref=arguments[0]

    if len(options) == 0:
        bibtexOpt = 1
        authorOpt = 1
        titleOpt = 1
        citeOpt = 1

    print ref
    rType, ref = findRefType(ref)
    print rType, ref

    if verboseOpt:
        print 'the reference ' + ref + ' is a(n) ' + rType

    if (bibtexOpt + authorOpt + titleOpt + citeOpt + libraryOpt) > 0:
        BiBTeX = getBiBTeX(ref,rType)
        if 'no records' in BiBTeX:
            print BiBTeX
            sys.exit(0)

    if bibtexOpt:
        print BiBTeX

    if bibitemOpt:
        bibitem = getBibitem(ref,rType)
        print bibitem
        if 'no records' in bibitem:
            sys.exit(0)

    if authorOpt:
        author = extractauthor(BiBTeX)
        print author

    if titleOpt:
        title = extracttitle(BiBTeX)
        print title

    if (citeOpt + libraryOpt) > 0:
        cite = extractcite(BiBTeX)

    if citeOpt:
        print '\cite{' + cite + '}'

    if libraryOpt:
        updatelibrary(cite,BiBTeX,BiBTeXlibraryFileName)

    if downloadOpt:
        downloadeprint(ref,rType,downloadPath)