-
Notifications
You must be signed in to change notification settings - Fork 60
/
bilstm.py
71 lines (53 loc) · 2.99 KB
/
bilstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import tensorflow as tf
from util import blocks
class MyModel(object):
def __init__(self, seq_length, emb_dim, hidden_dim, embeddings, emb_train):
## Define hyperparameters
self.embedding_dim = emb_dim
self.dim = hidden_dim
self.sequence_length = seq_length
## Define the placeholders
self.premise_x = tf.placeholder(tf.int32, [None, self.sequence_length])
self.hypothesis_x = tf.placeholder(tf.int32, [None, self.sequence_length])
self.y = tf.placeholder(tf.int32, [None])
self.keep_rate_ph = tf.placeholder(tf.float32, [])
## Define parameters
self.E = tf.Variable(embeddings, trainable=emb_train)
self.W_mlp = tf.Variable(tf.random_normal([self.dim * 8, self.dim], stddev=0.1))
self.b_mlp = tf.Variable(tf.random_normal([self.dim], stddev=0.1))
self.W_cl = tf.Variable(tf.random_normal([self.dim, 3], stddev=0.1))
self.b_cl = tf.Variable(tf.random_normal([3], stddev=0.1))
## Function for embedding lookup and dropout at embedding layer
def emb_drop(x):
emb = tf.nn.embedding_lookup(self.E, x)
emb_drop = tf.nn.dropout(emb, self.keep_rate_ph)
return emb_drop
# Get lengths of unpadded sentences
prem_seq_lengths, prem_mask = blocks.length(self.premise_x)
hyp_seq_lengths, hyp_mask = blocks.length(self.hypothesis_x)
### BiLSTM layer ###
premise_in = emb_drop(self.premise_x)
hypothesis_in = emb_drop(self.hypothesis_x)
premise_outs, c1 = blocks.biLSTM(premise_in, dim=self.dim, seq_len=prem_seq_lengths, name='premise')
hypothesis_outs, c2 = blocks.biLSTM(hypothesis_in, dim=self.dim, seq_len=hyp_seq_lengths, name='hypothesis')
premise_bi = tf.concat(premise_outs, axis=2)
hypothesis_bi = tf.concat(hypothesis_outs, axis=2)
#premise_final = blocks.last_output(premise_bi, prem_seq_lengths)
#hypothesis_final = blocks.last_output(hypothesis_bi, hyp_seq_lengths)
### Mean pooling
premise_sum = tf.reduce_sum(premise_bi, 1)
premise_ave = tf.div(premise_sum, tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1))
hypothesis_sum = tf.reduce_sum(hypothesis_bi, 1)
hypothesis_ave = tf.div(hypothesis_sum, tf.expand_dims(tf.cast(hyp_seq_lengths, tf.float32), -1))
### Mou et al. concat layer ###
diff = tf.subtract(premise_ave, hypothesis_ave)
mul = tf.multiply(premise_ave, hypothesis_ave)
h = tf.concat([premise_ave, hypothesis_ave, diff, mul], 1)
# MLP layer
h_mlp = tf.nn.relu(tf.matmul(h, self.W_mlp) + self.b_mlp)
# Dropout applied to classifier
h_drop = tf.nn.dropout(h_mlp, self.keep_rate_ph)
# Get prediction
self.logits = tf.matmul(h_drop, self.W_cl) + self.b_cl
# Define the cost function
self.total_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.logits))