-
Notifications
You must be signed in to change notification settings - Fork 3
/
inputDevice.py
127 lines (102 loc) · 4.63 KB
/
inputDevice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import re
import cPickle as pickle
import math
def clean( sentence ):
sentence = sentence.lower()
sentence = sentence.replace( ':-)', " blijesmiley " )
sentence = sentence.replace( ':)', " blijesmiley " )
sentence = sentence.replace( ':(', " zieligesmiley " )
sentence = sentence.replace( ':s', ' awkwardsmiley ' )
sentence = sentence.replace( '!', " ! " )
sentence = sentence.replace( '?', " ? " )
# Delete useless info, such as links, hashtags, twitteraccountnames
sentence = re.sub('rt |@\w+|http.*', '', sentence)
sentence = re.sub( r'\.|\,|\/|\\', '', sentence )
sentence = re.sub( r'\[|\]|'|\||#|:|;|\(|\)|\**', '', sentence )
sentence = re.sub( ' +',' ', sentence )
# delete non-expressive words
for x in ['bij','!','of','voor','in','een','he','op','wie','uit','eo','en','de','het','ik','jij','zij','wij','deze','dit','die','dat','is','je','na','zijn','uit','tot','te','sl','hierin','naar','onder','is']:
sentence = re.sub(' '+x+' ',' ',sentence)
sentence = re.sub('\A'+x+' ',' ',sentence)
sentence = re.sub(' '+x+'\Z', ' ', sentence)
return sentence
# print sentence
# Werkt nog niet cleanup is nog niet goed genoeg
#return __stemmer.stem( sentence )
def tokenize( sentence ):
return re.findall('\w+|\?|\!', sentence)
def classifyNewLine(sentence, filename = './weightsDayTraining.txt'):
# Change format for sentence
sentence = tokenize( clean( sentence ))
num_hidden = 5
num_classes = 1
# inputnodes have a value, weight
# layernodes have a value, weight,
# outputnodes only a value
inputNodes = {'v':dict(), 'w':dict()}
layerNodes = {'v':dict(), 'w':dict()}
outputNodes = {'v':dict() }
###########################################################################################
# Step 1, initialize weights
# load weights from file, if possible, else randomize weights
weights = pickle.load( open(filename, 'r') )
# remove hidden, bias
#print 'Loading weights from file', filename
number = 0
# use all tokens as a bag of words to form a word vector containing all possible features
for x in weights:
# and if not a hidden node weight or a bias unit
if not(re.match('hidden.*|bias',x)):
inputNodes['w'][number]= dict()
for j in range( num_hidden ):
# extract its weights to all hidden units
inputNodes['w'][number][j] = weights[x][j]
number += 1
# the n+1th unit is the bias node
inputNodes['w'][ number ] = dict()
for j in range( num_hidden ):
# load bias weights
inputNodes['w'][ number ][j] = weights['bias'][j]
layerNodes['w'][j] = dict()
for k in range( num_classes ):
layerNodes['w'][j][k] = weights['hidden'+str(j)][k]
layerNodes['w'][num_hidden] = dict()
for k in range( num_classes ):
# load hidden bias weights
layerNodes['w'][num_hidden][k] = weights['hidden'+str( num_hidden )][k]
#print inputNodes
#print layerNodes
#####################################################################################
# Step 2: create wordVector from weights dictionary
wordVector = list()
for x in weights.keys():
# skip all hidden and bias
if not(re.match('hidden.*|bias',x)):
if x in sentence:
wordVector.append(1)
else:
wordVector.append(0)
#print wordVector
#####################################################################################
# Step 3: forward propagate input through this network to retrieve output
# initialize input
for i in range( len( wordVector ) ):
# copy input values
inputNodes['v'][i] = wordVector[i]
# bias input layer and hidden layer
inputNodes['v'][ len(wordVector) ] = -1
layerNodes['v'][num_hidden ] = -1
# forward progapagation
for j in range( num_hidden ):
inputValue = 0
for i in range( len( wordVector ) + 1 ):
inputValue += inputNodes['v'][i] * inputNodes['w'][i][j]
layerNodes['v'][j] = (1 / (1 + math.exp( -inputValue )))
#print 'Hidden node',j,': g(', inputValue, ')=', layerNodes['v'][j]
for k in range( num_classes ):
inputValue = 0
for j in range( num_hidden + 1 ):
inputValue += layerNodes['v'][j] * layerNodes['w'][j][k]
outputNodes['v'][k] = (1 / (1 + math.exp( -inputValue )))
#print 'Output node',k,': g(', inputValue, ')=', outputNodes['v'][k]
return outputNodes['v'][0]