Skip to content

Commit

Permalink
fix: Avoid making unnecessary datatracker lookups (#457)
Browse files Browse the repository at this point in the history
Fixes #452
  • Loading branch information
kesara authored Aug 15, 2024
1 parent 3cf8ab9 commit db2081d
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 75 deletions.
152 changes: 78 additions & 74 deletions bibxml/xml2rfc_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,82 +231,86 @@ def resolve(self) -> BibliographicItem:
"lacks I-D version", self.anchor)
indexed_version = None

# Check Datatracker’s latest version (slow)
try:
dt_bibitem = get_internet_draft(
f'draft-{self.bare_anchor}',
strict=indexed_bibitem is None,
).bibitem

if len(dt_bibitem.version or []) > 0:
dt_version = dt_bibitem.version[0].draft
if not isinstance(dt_version, str):
raise ValueError(
"Malformed I-D version (not a string): "
f"{dt_version}")
try:
parsed_version = int(dt_version)
except (ValueError, TypeError):
raise ValueError(
"Malformed I-D version (doesn’t parse to an integer): "
f"{dt_version}")
else:
if parsed_version < 0:
dt_version: Optional[str] = None
dt_bibitem: Optional[BibliographicItem] = None

if not (self.requested_version and self.requested_version == indexed_version):
# Check Datatracker’s latest version (slow)
# when request is unversioned or requested version is not indexed
try:
dt_bibitem = get_internet_draft(
f'draft-{self.bare_anchor}',
strict=indexed_bibitem is None,
).bibitem

if dt_bibitem and dt_bibitem.version and len(dt_bibitem.version) > 0:
dt_version = dt_bibitem.version[0].draft
if not isinstance(dt_version, str):
raise ValueError(
"Malformed I-D version (not a positive integer): "
"Malformed I-D version (not a string): "
f"{dt_version}")
else:
raise ValueError("Missing I-D version")

except Exception:
log.exception(
"Failed to fetch or validate latest draft from Datatracker "
"when resolving xml2rfc bibxml3 path")
else:
# Conditions for falling back to Datatracker’s response.
# We want to prefer indexed items in general, because they tend to
# provide more complete data, but in some cases we have no choice
# but to fall back.
if any([
# We were not requested a version
not self.requested_version,
# We were requested a version and we got that version
# from Datatracker
self.requested_version == dt_version,
]) and any([
# We did not find indexed item matching given ID
# and maybe version:
not indexed_bibitem,
# We were not requested a version,
# and latest version on Datatracker is different
# (assuming newer):
not self.requested_version
and indexed_version != dt_version,
# We were requested a version,
# and somehow indexed version does not match requested version:
self.requested_version
and indexed_version != self.requested_version,
]):
# Datatracker’s version is newer
# or we don’t have this draft indexed.
# Note this (should be transient until sources are reindexed,
# if not then there’s a problem)
# and return Datatracker’s version
self.log(f"returning Datatracker version {dt_version}")
log.warn(
"Returning Datatracker result for xml2rfc bibxml3 path. "
"If unversioned I-D was requested, "
"then Datatracker may have a newer I-D version "
"than indexed sources. "
"Alternatively, indexed version could not be used "
"for some reason. "
"Requested version %s, "
"indexed sources have version %s, "
"returning Datatracker’s version %s. ",
self.requested_version,
indexed_version,
dt_version)
self.resolved_item = dt_bibitem
try:
parsed_version = int(dt_version)
except (ValueError, TypeError):
raise ValueError(
"Malformed I-D version (doesn’t parse to an integer): "
f"{dt_version}")
else:
if parsed_version < 0:
raise ValueError(
"Malformed I-D version (not a positive integer): "
f"{dt_version}")
else:
raise ValueError("Missing I-D version")

except Exception:
log.exception(
"Failed to fetch or validate latest draft from Datatracker "
"when resolving xml2rfc bibxml3 path")
# Conditions for falling back to Datatracker’s response.
# We want to prefer indexed items in general, because they tend to
# provide more complete data, but in some cases we have no choice
# but to fall back.
if any([
# We were not requested a version
not self.requested_version,
# We were requested a version and we got that version
# from Datatracker
self.requested_version == dt_version,
]) and any([
# We did not find indexed item matching given ID
# and maybe version:
not indexed_bibitem,
# We were not requested a version,
# and latest version on Datatracker is different
# (assuming newer):
not self.requested_version
and indexed_version != dt_version,
# We were requested a version,
# and somehow indexed version does not match requested version:
self.requested_version
and indexed_version != self.requested_version,
]):
# Datatracker’s version is newer
# or we don’t have this draft indexed.
# Note this (should be transient until sources are reindexed,
# if not then there’s a problem)
# and return Datatracker’s version
self.log(f"returning Datatracker version {dt_version}")
log.warn(
"Returning Datatracker result for xml2rfc bibxml3 path. "
"If unversioned I-D was requested, "
"then Datatracker may have a newer I-D version "
"than indexed sources. "
"Alternatively, indexed version could not be used "
"for some reason. "
"Requested version %s, "
"indexed sources have version %s, "
"returning Datatracker’s version %s. ",
self.requested_version,
indexed_version,
dt_version)
self.resolved_item = dt_bibitem

if not self.resolved_item:
if indexed_bibitem and any([
Expand Down
3 changes: 2 additions & 1 deletion xml2rfc_compat/fixtures/test_refdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@
"link": [
{
"type": "TXT",
"content": "https://www.ietf.org/archive/id/draft-ietf-hip-rfc5201-bis-13.txt"
"content": "https://www.example.org/versioned-13.txt"
}
],
"type": "standard",
Expand Down Expand Up @@ -697,6 +697,7 @@
"language": [
"en"
],
"version": "13",
"docnumber": "I-D.ietf-hip-rfc5201-bis",
"contributor": [
{
Expand Down
19 changes: 19 additions & 0 deletions xml2rfc_compat/tests/test_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def setUp(self):
self.rfcs_ref = "RFC.4037"
self.misc_ref = "FIPS.180.1993"
self.internet_drafts_ref = "I-D.ietf-hip-rfc5201-bis"
self.internet_drafts_versioned_ref = "I-D.draft-ietf-hip-rfc5201-bis-13"
self.w3c_ref = "rec-powder-grouping-20090901"
self.threegpp_ref = "SDO-3GPP.25.321:Rel-8/8.3.0"
self.ieee_ref = "R.IEEE.P2740/D-6.5.2020-08"
Expand Down Expand Up @@ -68,6 +69,24 @@ def test_internet_drafts(self):
bibitem = adapter.resolve()
self._assert_is_instance_of_bibliographicitem(bibitem)
self._assert_refs_equal(bibitem, self.internet_drafts_ref)
self.assertTrue(
all(
# provides latest link from the datatracker
link.content.startswith("https://datatracker.ietf.org/")
for link in as_list(bibitem.link or [])
)
)

def test_internet_drafts_versioned(self):
adapter = InternetDraftsAdapter(self.dirname, "bibxml3", self.internet_drafts_versioned_ref)
bibitem = adapter.resolve()
self.assertTrue(
all(
# provides link from the fixtures
link.content == "https://www.example.org/versioned-13.txt"
for link in as_list(bibitem.link or [])
)
)

def test_internet_drafts_not_found(self):
adapter = InternetDraftsAdapter(self.dirname, "bibxml3", self.internet_drafts_ref.replace("b", ""))
Expand Down

0 comments on commit db2081d

Please sign in to comment.