Skip to content

Commit

Permalink
Work on test for #615
Browse files Browse the repository at this point in the history
  • Loading branch information
honnibal committed Nov 23, 2016
1 parent 1b77932 commit e01c187
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions spacy/tests/regression/test_issue615.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import unicode_literals
import spacy
from spacy.attrs import ORTH


def merge_phrases(matcher, doc, i, matches):
'''
Merge a phrase. We have to be careful here because we'll change the token indices.
To avoid problems, merge all the phrases once we're called on the last match.
'''
if i != len(matches)-1:
return None
# Get Span objects
spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches]
for ent_id, label, span in spans:
span.merge('NNP' if label else span.root.tag_, span.text, doc.vocab.strings[label])

def test_entity_ID_assignment():
nlp = spacy.en.English()
text = u"""The golf club is broken"""
doc = nlp(text)

golf_pattern = [
{ ORTH: "golf"},
{ ORTH: "club"}
]

matcher = spacy.matcher.Matcher(nlp.vocab)
matcher.add_entity('Sport_Equipment', on_match = merge_phrases)
matcher.add_pattern("Sport_Equipment", golf_pattern, label = 'Sport_Equipment')

match = matcher(doc)
entities = list(doc.ents)

assert entities != [] #assertion 1
assert entities[0].label != 0 #assertion 2

0 comments on commit e01c187

Please sign in to comment.