-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLexicon.py
79 lines (64 loc) · 2.3 KB
/
Lexicon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
# Lexicon class
class Lexicon:
"""
A class used to represent a Lexicon
...
Attributes
----------
name : str
the emotion that this lexicon evaluates/scores
embed : dict()
A dict containing the words with respective lexicon scores
"""
def __init__(self, emotion, wordlex):
self.emo = emotion
self.wordlex = wordlex
def clean_lexicon(word):
"""Cleaning the lexicons
Parameters:
word (str): The word to be cleaned
Returns:
word (str): The cleaned word
"""
if word[0] == "#" or word[0] == "@":
word = word[1:]
return word
def datareader(path_to_file, Elex=True):
"""Transform the raw input of a lexicon to a pandas DataFrame
Parameters:
path_to_file (str): The path to the lexicon file
Elex (binary): True if it is an emotion lexicon, False if it is an sentiment lexicon
Returns:
lexicon (pandas DataFrame): The resulting DataFrame
"""
if Elex:
lexicon = pd.read_csv(path_to_file, sep="\t", index_col=False, names=["emotion", "word", "score"])
return lexicon
else:
lexicon = pd.read_csv(path_to_file, sep="\t", index_col=False, names=["word", "score", "pos", "neg"])
return lexicon
def load_lexicon(df, Elex=True):
"""Transforms the pandas DataFrame to the a list of Lexicons
Parameters:
df (pandas DataFrame): The df from datareader
Elex (binary): True if it is an emotion lexicon, False if it is an sentiment lexicon
Returns:
lexicons (list: Lexicons): A list of the lexicons created
"""
lexicons = []
for i in df.index:
df.at[i, "word"] = clean_lexicon(str(df.at[i, "word"]))
if Elex:
for emo in df["emotion"].unique():
lexd = dict()
elex = df[df["emotion"] == emo]
for idx, row in elex.iterrows():
lexd[row["word"]] = row["score"]
lexicons.append(Lexicon(emo, lexd))
else:
lexd = dict()
for idx, row in df.iterrows():
lexd[row["word"]] = row["score"]
lexicons.append(Lexicon(emotion="sentiment", wordlex=lexd))
return lexicons