diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index c6195d7e2ac..10700b7874a 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -567,6 +567,7 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer): "Merging the docs is fun.", "", "They don't think alike. ", + "", "Another doc.", ] en_texts_without_empty = [t for t in en_texts if len(t)] @@ -574,9 +575,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer): en_docs = [en_tokenizer(text) for text in en_texts] en_docs[0].spans["group"] = [en_docs[0][1:4]] en_docs[2].spans["group"] = [en_docs[2][1:4]] - en_docs[3].spans["group"] = [en_docs[3][0:1]] + en_docs[4].spans["group"] = [en_docs[4][0:1]] span_group_texts = sorted( - [en_docs[0][1:4].text, en_docs[2][1:4].text, en_docs[3][0:1].text] + [en_docs[0][1:4].text, en_docs[2][1:4].text, en_docs[4][0:1].text] ) de_doc = de_tokenizer(de_text) Token.set_extension("is_ambiguous", default=False) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 362a1778455..2f82a0d1bdf 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1183,7 +1183,7 @@ cdef class Doc: token_offset = -1 for doc in docs[:-1]: token_offset += len(doc) - if not (len(doc) > 0 and doc[-1].is_space): + if len(doc) > 0 and not doc[-1].is_space: concat_spaces[token_offset] = True concat_array = numpy.concatenate(arrays)