Source code for scripts.mrtools
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Small command line tools for accessing bibliographic data from 'MathSciNet'.
"""
import requests
import yaml
import click
import listb.mrtools as mrtools
[docs]def chunk_list(l, n):
""" Chops a list into tuples (chunks) of maximal size `n`
Args:
l (List[Any]): the list to be resized
n (int): maximal size of the chunks
Example:
>>> chunk_list([1, 2, 3, 4, 5], 2)
[(1, 2), (3, 4), (5,)]
"""
assert n > 0
nl = len(l)
if n > nl:
return [tuple(l)]
l_ = list(zip(*[l[i::n] for i in range(n)]))
if nl % n > 0:
l_.append(tuple(l[-(nl % n):]))
return l_
@click.group()
def cli():
""" Small command line tool for crawling search pages on
MathSciNet, requesting MR-numbers and obtaining BibTex-databases
"""
pass
@click.command('crawl',
short_help='Prints the URL and all suceeding URLs.')
@click.option('--url',
prompt='Please enter URL',
help='URL pointing to MathSciNet search result')
def crawl(url):
""" Prints the URL and all suceeding URLs.
If the search result is split into 5 pages and the URL to page
3 is passed then the URLs of pages 3, 4, and 5 are printed.
"""
_, urls = mrtools.crawl(url)
click.echo('\n'.join(urls))
@click.command('mrnumbers',
short_help='Prints the MR-numbers of the entries')
@click.option('--url',
prompt='Please enter URL',
help='URL pointing to MathSciNet search result')
@click.option('--crawl/--no-crawl',
default=False,
help='Crawl page and return all MR-numbers')
@click.option('--dump',
type=click.File('w'),
help='path to yaml file for output')
def mrnumbers(url, crawl, dump):
if crawl:
sites, _ = mrtools.crawl(url)
else:
req = requests.get(url)
sites = [req.text]
mmrn = [mrtools.msn_to_mrnumbers(s) for s in sites]
mmrn = [n for sublist in mmrn for n in sublist] # flattens the list
if dump:
mrtools.yaml_dumps(mmrn, dump)
else:
click.echo('\n'.join(mmrn))
@click.command('bib',
short_help='Retrieves BibTeX file for MR-numbers')
@click.option('--load',
type=click.File('r'),
help='path to yaml file storing the MR-numbers')
@click.option('--dump',
type=click.File('w'),
help='Path to BibTeX file for output')
@click.argument('mrnumbers',
nargs=-1)
def bib(load, dump, mrnumbers):
""" Fetches BibTeX entries for MR-numbers from MathSciNet.
If both `--load` and `mrnumbers` are specified, only the numbers
stored in the yaml file are used.
"""
if load:
mrnumbers = yaml.load(load)
elif mrnumbers:
pass
else:
raise click.UsageError('Please specify yaml file or mrnumbers.')
chunks = chunk_list(mrnumbers, 20)
with click.progressbar(chunks) as bar:
bibs = [mrtools.get_bibtex_from_msn(c) for c in bar]
if not all(bibs):
err_bibs = filter(lambda x: x[1] == None, enumerate(bibs))
err_chunks = [', '.join(chunks[i]) for i, _ in err_bibs]
raise click.UsageError('There seems to be something wrong with '
'at least one of the following MR-numbers.'
'\n\n%s' % '\n'.join(err_chunks))
if dump:
dump.write('\n'.join(bibs))
else:
click.echo('\n'.join(bibs))
cli.add_command(crawl)
cli.add_command(mrnumbers)
cli.add_command(bib)
if __name__ == '__main__':
cli()