-
Notifications
You must be signed in to change notification settings - Fork 0
/
flipFlop.py
113 lines (92 loc) · 4 KB
/
flipFlop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from urllib.request import urlopen
import re
from gensim.models import word2vec
from gensim import models
from gensim.models.keyedvectors import KeyedVectors
pathToBinVectors='./GoogleNews-vectors-negative300.bin'
print ("Loading the data file... Please wait...")
model1=KeyedVectors.load_word2vec_format(pathToBinVectors, binary=True)
print ("Successfully loaded 3.6 G bin file!")
import numpy as np
import math
import scipy
from random import sample
import sys
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
SIMILARITY=.8
meme=SentimentIntensityAnalyzer()
num_features=300
index2word_set=set(model1.index2word)
cache={}
def sentence_sentiment(sentence):
ss=meme.polarity_scores(sentence)
return ss['compound']
def compare_similar_sentences(subsequence ):
retArray=[]
retArray.append([])
retArray.append([])
#print (retArray)
prev = float(sentence_sentiment(subsequence[0]))
for i in range (1,len(subsequence)):
temp = float(sentence_sentiment(subsequence[i]))
if temp*prev <= 0:
#print("Flip Flopped")
retArray[0].append(subsequence[i])
else:
#print("Didn't Flop Flop")re
retArray[1].append(subsequence[i])
return retArray
def avg_feature_vector(words, model, num_features):
featureVec = np.zeros((num_features,), dtype="float32")
nwords=0
for word in words:
if word in cache:
nwords = nwords+1
featureVec = np.add(featureVec, cache[word])
elif word in index2word_set:
cache[word] = model[word]
featureVec = np.add(featureVec, model[word])
nwords = nwords+1
if(nwords>0):
featureVec = np.divide(featureVec, nwords)
return featureVec
def mostSimilarTo(sentence, sentenceArray):
similarSentences = []
sentenceDict = {}
notDumbWords=0
sentence_1_avg_vector = avg_feature_vector(sentence.split(), model1, num_features=300)
for potentialSentence in sentenceArray:
if potentialSentence != sentence:
sentence_2_avg_vector = avg_feature_vector(potentialSentence.split(), model1, num_features=300)
sen1_sen2_similarity = 1 - scipy.spatial.distance.cosine(sentence_1_avg_vector,sentence_2_avg_vector)
if sen1_sen2_similarity>SIMILARITY:
similarSentences.append(potentialSentence)
return similarSentences
def flipFlopped(sentenceArray):
retArr=[]
for sentence in sentenceArray:
retSentenceArray = mostSimilarTo(sentence, sentenceArray)
if( len(retSentenceArray) != 0):
retSentenceArray.insert(0,sentence)
retStatus = compare_similar_sentences(retSentenceArray)
retStatus.insert(0, sentence)
retArr.append(retStatus)
return retArr
# [
# [ sentence, flipFlopLIST[], ConsistentLIST[] ]
# ,[sentence, flipFlopLIST, ConsistentLIST]
# ]
# print ( flipFlopped(['The sky is blue today', 'the sky is green today']) )
# print(flipFlopped(['I hate war war is bad war is not good',
# 'non no no yes hello hi there', 'These are not different and cool', 'these are different and cool',
# 'I love war war is okay war is fun',
# 'War sounds good is good great war']))
# flipFlopped( ['Her late, great husband, Antonin Scalia, will forever be a symbol of American justice','As promised, I directed the Department of Defense to develop a plan to demolish and destroy ISIS -- a network of lawless savages that have slaughtered Muslims and Christians, and men, and women, and children of all faiths and all beliefs','Finally, I have kept my promise to appoint a justice to the United States Supreme Court, from my list of 20 judges, who will defend our Constitution'])
'''
flipFlopped(['I hate war war is bad war is not good',
'non no no yes hello hi there', 'These are not different and cool', 'these are different and cool',
'I love war war is okay war is fun',
'War sounds good is good great war'])
#
'''