Skip to content

Commit

Permalink
Support Any comparisons for Token and Span (#13058)
Browse files Browse the repository at this point in the history
* Support Any comparisons for Token and Span

* Preserve previous behavior for None
  • Loading branch information
adrianeboyd authored Oct 12, 2023
1 parent d72029d commit ea1befa
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 6 deletions.
9 changes: 9 additions & 0 deletions spacy/tests/doc/test_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,3 +731,12 @@ def test_for_no_ent_sents():
sents = list(doc.ents[0].sents)
assert len(sents) == 1
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"


def test_span_api_richcmp_other(en_tokenizer):
doc1 = en_tokenizer("a b")
doc2 = en_tokenizer("b c")
assert not doc1[1:2] == doc1[1]
assert not doc1[1:2] == doc2[0]
assert not doc1[1:2] == doc2[0:1]
assert not doc1[0:1] == doc2
9 changes: 9 additions & 0 deletions spacy/tests/doc/test_token_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,3 +294,12 @@ def test_missing_head_dep(en_vocab):
assert aligned_heads[0] == ref_heads[0]
assert aligned_deps[5] == ref_deps[5]
assert aligned_heads[5] == ref_heads[5]


def test_token_api_richcmp_other(en_tokenizer):
doc1 = en_tokenizer("a b")
doc2 = en_tokenizer("b c")
assert not doc1[1] == doc1[0:1]
assert not doc1[1] == doc2[1:2]
assert not doc1[1] == doc2[0]
assert not doc1[0] == doc2
7 changes: 5 additions & 2 deletions spacy/tokens/span.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,17 @@ cdef class Span:
self._vector = vector
self._vector_norm = vector_norm

def __richcmp__(self, Span other, int op):
def __richcmp__(self, object other, int op):
if other is None:
if op == 0 or op == 1 or op == 2:
return False
else:
return True
if not isinstance(other, Span):
return False
cdef Span other_span = other
self_tuple = (self.c.start_char, self.c.end_char, self.c.label, self.c.kb_id, self.id, self.doc)
other_tuple = (other.c.start_char, other.c.end_char, other.c.label, other.c.kb_id, other.id, other.doc)
other_tuple = (other_span.c.start_char, other_span.c.end_char, other_span.c.label, other_span.c.kb_id, other_span.id, other_span.doc)
# <
if op == 0:
return self_tuple < other_tuple
Expand Down
7 changes: 6 additions & 1 deletion spacy/tokens/token.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@ class Token:
def __bytes__(self) -> bytes: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...
def __richcmp__(self, other: Token, op: int) -> bool: ...
def __lt__(self, other: Any) -> bool: ...
def __le__(self, other: Any) -> bool: ...
def __eq__(self, other: Any) -> bool: ...
def __ne__(self, other: Any) -> bool: ...
def __gt__(self, other: Any) -> bool: ...
def __ge__(self, other: Any) -> bool: ...
@property
def _(self) -> Underscore: ...
def nbor(self, i: int = ...) -> Token: ...
Expand Down
9 changes: 6 additions & 3 deletions spacy/tokens/token.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,20 @@ cdef class Token:
def __repr__(self):
return self.__str__()

def __richcmp__(self, Token other, int op):
def __richcmp__(self, object other, int op):
# http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
if other is None:
if op in (0, 1, 2):
return False
else:
return True
if not isinstance(other, Token):
return False
cdef Token other_token = other
cdef Doc my_doc = self.doc
cdef Doc other_doc = other.doc
cdef Doc other_doc = other_token.doc
my = self.idx
their = other.idx
their = other_token.idx
if op == 0:
return my < their
elif op == 2:
Expand Down

0 comments on commit ea1befa

Please sign in to comment.