from functools import lru_cache
from urllib.error import HTTPError, URLError
from pybtex.database import Entry
[docs]def cleanup_string(string):
return string.replace('{', '').replace('}', '').replace('\\', '')
[docs]def recurse_bibtex(obj, entries):
for b in obj.__class__.__bases__:
if issubclass(b, Citable):
entries.extend(b.BIBTEX_ENTRIES)
recurse_bibtex(b, entries)
[docs]def stringify_people(authors):
return ', '.join([cleanup_string(str(p)) for p in authors])
[docs]def unique_citations_only(citations):
current_citations = []
for c in citations:
if c not in current_citations:
current_citations.append(c)
return current_citations
[docs]def to_bibtex(citations):
import uuid
from pybtex.database import BibliographyData
entries = {str(uuid.uuid4())[:8]: b for b in citations}
bib_data = BibliographyData(entries=entries)
return bib_data.to_string('bibtex')
[docs]def handle_publication(fields):
journal = []
if 'journal' in fields:
journal.append(cleanup_string(fields['journal']))
elif 'booktitle' in fields:
journal.append(cleanup_string(fields['booktitle']))
elif 'archivePrefix' in fields:
journal.append(cleanup_string(fields['archivePrefix']))
if 'volume' in fields:
journal.append(cleanup_string(fields['volume']))
elif 'eprint' in fields:
journal.append(cleanup_string(fields['eprint']))
if 'pages' in fields:
journal.append(cleanup_string(fields['pages']))
if 'month' in fields:
journal.append(cleanup_string(fields['month']))
if 'year' in fields:
journal.append(cleanup_string(fields['year']))
return ', '.join(journal)
[docs]def construct_nice_printable_string(entry, indent=0):
mystring = ''
indent = ''.join(['\t']*indent)
form = f'{indent}%s\n'
if isinstance(entry, str):
return f'Found non bibtex citation: {entry}\n'
if 'title' in entry.fields:
mystring += form % cleanup_string(entry.fields['title'])
people = entry.persons
if 'author' in people:
mystring += form % stringify_people(people['author'])
mystring += form % handle_publication(entry.fields)
return mystring
[docs]class Citable:
"""
Defines a class that contains citation
information.
"""
BIBTEX_ENTRIES = []
"""
List of bibtext entries
"""
[docs] def citations(self):
entries = self.BIBTEX_ENTRIES[:]
recurse_bibtex(self, entries)
all_citations = [Entry.from_string(b, 'bibtex')
for b in entries]
return unique_citations_only(all_citations)
[docs] def nice_citation(self, prefix='', start_idx=0, indent=0):
entries = self.citations()
if len(entries) == 0:
return ''
return '\n'.join([construct_nice_printable_string(e)
for e in entries])
[docs]@lru_cache(maxsize=100)
def doi_to_bibtex(doi):
import urllib
BASE_URL = 'http://dx.doi.org/'
url = BASE_URL + doi
req = urllib.request.Request(url)
req.add_header('Accept', 'application/x-bibtex')
try:
with urllib.request.urlopen(req) as f:
return f.read().decode()
except HTTPError as e:
if e.code == 404:
print('DOI not found.')
else:
print('Service unavailable.')
return None
except URLError:
return None