Skip to content

Commit

Permalink
Fix Span.sents for edge case of Span being the only Span in the last …
Browse files Browse the repository at this point in the history
…sentence of a Doc. (explosion#12484)
  • Loading branch information
rmitsch authored and adrianeboyd committed Apr 3, 2023
1 parent 26da226 commit 8d06487
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
15 changes: 15 additions & 0 deletions spacy/tests/doc/test_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,3 +716,18 @@ def test_for_partial_ent_sents():
# equal to the sentences referenced in ent.sents.
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
assert doc_sent == ent_sent


def test_for_no_ent_sents():
"""Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
sentence.
"""
doc = Doc(
English().vocab,
words=["This", "is", "a", "test.", "ENTITY"],
sent_starts=[1, 0, 0, 0, 1],
)
doc.set_ents([Span(doc, 4, 5, "WORK")])
sents = list(doc.ents[0].sents)
assert len(sents) == 1
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
4 changes: 4 additions & 0 deletions spacy/tokens/span.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ cdef class Span:
elif i == self.doc.length - 1:
yield Span(self.doc, start, self.doc.length)

# Ensure that trailing parts of the Span instance are included in last element of .sents.
if start == self.doc.length - 1:
yield Span(self.doc, start, self.doc.length)

@property
def ents(self):
"""The named entities that fall completely within the span. Returns
Expand Down

0 comments on commit 8d06487

Please sign in to comment.