forked from snorkel-team/snorkel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.py
48 lines (40 loc) · 1.97 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import pandas as pd
from snorkel.models import StableLabel
from snorkel.db_helpers import reload_annotator_labels
FPATH = 'data/gold_labels.tsv'
def number_of_people(sentence):
active_sequence = False
count = 0
for tag in sentence.ner_tags:
if tag == 'PERSON' and not active_sequence:
active_sequence = True
count += 1
elif tag != 'PERSON' and active_sequence:
active_sequence = False
return count
def load_external_labels(session, candidate_class, annotator_name='gold'):
gold_labels = pd.read_csv(FPATH, sep="\t")
for index, row in gold_labels.iterrows():
# We check if the label already exists, in case this cell was already executed
context_stable_ids = "~~".join([row['person1'], row['person2']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Because it's a symmetric relation, load both directions...
context_stable_ids = "~~".join([row['person2'], row['person1']])
query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
query = query.filter(StableLabel.annotator_name == annotator_name)
if query.count() == 0:
session.add(StableLabel(
context_stable_ids=context_stable_ids,
annotator_name=annotator_name,
value=row['label']))
# Commit session
session.commit()
# Reload annotator labels
reload_annotator_labels(session, candidate_class, annotator_name, split=1, filter_label_split=False)
reload_annotator_labels(session, candidate_class, annotator_name, split=2, filter_label_split=False)