Skip to content

Commit

Permalink
chore: avoid overwriting pre-existing entities tomaarsen#17
Browse files Browse the repository at this point in the history
  • Loading branch information
davidberenstein1957 committed Jul 12, 2023
1 parent 91b8f03 commit f34f813
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions span_marker/spacy_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
from datasets import Dataset
from spacy.tokens import Doc
from spacy.util import minibatch
from spacy.util import minibatch, filter_spans
import types

from span_marker.modeling import SpanMarkerModel
Expand Down Expand Up @@ -104,7 +104,7 @@ def __call__(self, doc: Doc) -> Doc:
span.label_ = entity["label"]
outputs.append(span)

doc.set_ents(outputs)
doc.set_ents(filter_spans(list(doc.ents) + outputs))
return doc

def pipe(self, stream, batch_size=128, include_sent=None):
Expand All @@ -131,5 +131,5 @@ def pipe(self, stream, batch_size=128, include_sent=None):
span = doc[start:end]
span.label_ = entity["label"]
outputs.append(span)
doc.set_ents(outputs)
doc.set_ents(filter_spans(list(doc.ents) + outputs))
yield doc

0 comments on commit f34f813

Please sign in to comment.