-
Notifications
You must be signed in to change notification settings - Fork 2
/
bert_tweets_logistic_reg_model_building_evaluation.py
47 lines (33 loc) · 1.37 KB
/
bert_tweets_logistic_reg_model_building_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
pd.set_option('display.max_colwidth', 200)
# read data
train = pd.read_csv("train_2kmZucJ.csv")
test = pd.read_csv("test_oJQbWVk.csv")
print (train.shape, test.shape)
# load elmo_train_new
pickle_in = open("bert_train_03032019.pickle", "rb")
bert_train_new = pickle.load(pickle_in)
# load elmo_train_new
pickle_in = open("bert_test_03032019.pickle", "rb")
bert_test_new = pickle.load(pickle_in)
xtrain, xvalid, ytrain, yvalid = train_test_split(bert_train_new,
train['label'],
random_state=42,
test_size=0.2)
print (ytrain.shape, yvalid.shape)
lreg = LogisticRegression()
lreg.fit(xtrain, ytrain)
preds_valid = lreg.predict(xvalid)
print (f1_score(yvalid, preds_valid))
# make predictions on test set
preds_test = lreg.predict(bert_test_new)
# prepare submission dataframe
sub = pd.DataFrame({'id':test['id'], 'label':preds_test})
# write predictions to a CSV file
sub.to_csv("sub_lreg.csv", index=False)