Skip to content

Commit

Permalink
Improve logging: standard formatting, verbose tests (#132)
Browse files Browse the repository at this point in the history
+ Should decrease test verbosity when instability is resolved.
+ Consider demoting more log lines to DEBUG.
  • Loading branch information
lukasschwab authored Oct 16, 2023
1 parent 821cca9 commit 849381d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
30 changes: 14 additions & 16 deletions arxiv/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,12 +577,7 @@ def results(self, search: Search, offset: int = 0) -> Generator[Result, None, No
first_page = True
while offset < total_results:
page_size = min(self.page_size, search.max_results - offset)
logger.info(
"Requesting {} results at offset {}".format(
page_size,
offset,
)
)
logger.info("Requesting %d results at offset %d", page_size, offset)
page_url = self._format_url(search, offset, page_size)
feed = self._parse_feed(page_url, first_page)
if first_page:
Expand All @@ -591,16 +586,18 @@ def results(self, search: Search, offset: int = 0) -> Generator[Result, None, No
# bug is fixed, we can remove this conditional and always set
# `total_results = min(...)`.
if len(feed.entries) == 0:
logger.info("Got empty results; stopping generation")
logger.info("Got empty first page; stopping generation")
total_results = 0
else:
total_results = min(
total_results, int(feed.feed.opensearch_totalresults)
)
logger.info(
"Got first page; {} of {} results available".format(
total_results, search.max_results
)
"Got first page: %d of %d total results",
total_results,
search.max_results
if search.max_results != float("inf")
else -1,
)
# Subsequent pages are not the first page.
first_page = False
Expand All @@ -610,8 +607,8 @@ def results(self, search: Search, offset: int = 0) -> Generator[Result, None, No
for entry in feed.entries:
try:
yield Result._from_feed_entry(entry)
except Result.MissingFieldError:
logger.warning("Skipping partial result")
except Result.MissingFieldError as e:
logger.warning("Skipping partial result: %s", e)
continue

def _format_url(self, search: Search, start: int, page_size: int) -> str:
Expand Down Expand Up @@ -661,14 +658,14 @@ def __try_parse_feed(
since_last_request = datetime.now() - self._last_request_dt
if since_last_request < required:
to_sleep = (required - since_last_request).total_seconds()
logger.info("Sleeping for %f seconds", to_sleep)
logger.info("Sleeping: %f seconds", to_sleep)
time.sleep(to_sleep)
logger.info(
"Requesting page of results",
"Requesting page (try %d): %s",
retry,
url,
extra={
"url": url,
"first_page": first_page,
"retry": retry,
"last_err": last_err.message if last_err is not None else None,
},
)
Expand All @@ -680,6 +677,7 @@ def __try_parse_feed(
elif len(feed.entries) == 0 and not first_page:
err = UnexpectedEmptyPageError(url, retry)
if err is not None:
logger.debug("Got error (try %d): %s", retry, err)
if retries_left > 0:
return self.__try_parse_feed(
url,
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ description_file = README.md

[tool:pytest]
addopts = --verbose
log_cli = True
log_cli_level = INFO

0 comments on commit 849381d

Please sign in to comment.