-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspacy_10_debuggingPatterns.py
52 lines (41 loc) · 1.78 KB
/
spacy_10_debuggingPatterns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
"""
Created on Tue May 28 2019
@author: Stacy Bridges
NOTE:
When you are using the token-based Matcher, it is important
to pay close attention to your tokenization.
Sometimes it's much easier to just match exact strings instead
and use the PhraseMatcher.
"""
import spacy
from spacy.matcher import Matcher
def main():
nlp = spacy.load("en_core_web_sm")
doc = nlp(
"Twitch Prime, the perks program for Amazon Prime members offering free "
"loot, games and other benefits, is ditching one of its best features: "
"ad-free viewing. According to an email sent out to Amazon Prime members "
"today, ad-free viewing will no longer be included as a part of Twitch "
"Prime for new members, beginning on September 14. However, members with "
"existing annual subscriptions will be able to continue to enjoy ad-free "
"viewing until their subscription comes up for renewal. Those with "
"monthly subscriptions will have access to ad-free viewing until October 15."
)
# Create the match patterns
pattern1 = [{"LOWER": "amazon"}, {"IS_TITLE": True, "POS": "PROPN"}]
pattern2 = [{"LOWER": "ad"}, {"TEXT": "-"}, {"LOWER": "free"}, {"POS": "NOUN"}]
# Initialize the Matcher and add the patterns
matcher = Matcher(nlp.vocab)
matcher.add("PATTERN1", None, pattern1)
matcher.add("PATTERN2", None, pattern2)
# Iterate over the matches
for match_id, start, end in matcher(doc):
# Print pattern string name and text of matched span
print(doc.vocab.strings[match_id], doc[start:end].text)
'''
# test iteration over tokens
for token in doc:
print(token.text, token.pos_)
'''
if __name__ == '__main__' : main()