From 42414034b493223dc5fe58a6bf48b7f447dafb40 Mon Sep 17 00:00:00 2001 From: Hao <130360943+Hao-Z-hang@users.noreply.github.com> Date: Wed, 25 Dec 2024 11:43:49 +0000 Subject: [PATCH] Update create_data.py emm --- emm/data/create_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emm/data/create_data.py b/emm/data/create_data.py index 2b9959a..481db78 100644 --- a/emm/data/create_data.py +++ b/emm/data/create_data.py @@ -511,5 +511,5 @@ def create_training_data() -> tuple[pd.DataFrame, Vocabulary]: lambda r: None if r["no_candidate"] else r["gt_uid"], axis=1 ) # By convention, gt_uid is null in the no_candidate case - vocabulary = Vocabulary(very_common_words={"bv", "nv"}, common_words={"bank", "holding"}) + vocabulary = Vocabulary(very_common_words={"bv", "nv", "ltd", "plc", "limited", "co", "llp"}, common_words={"bank", "holding", "holdings"}) return df, vocabulary