-
Notifications
You must be signed in to change notification settings - Fork 3
/
Attention_BiLSTM_Model.py
87 lines (65 loc) · 3.75 KB
/
Attention_BiLSTM_Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
'''
Created on 27 Sep 2017
'''
import tensorflow as tf
import numpy as np
from attention import attention_mechanism
from tensorflow.python.ops.rnn import dynamic_rnn
from tensorflow.contrib.rnn import BasicLSTMCell
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn
from tensorflow.contrib.rnn import GRUCell
class attentionBiLSTM(object):
"""
Attention BiLSTM model
"""
def __init__(
self, input_embedding_size, sequence_length, hidden_size, output_size, vocab_size, learning_rate ,ATTENTION_SIZE):
#Placeholder for input, output and dropout
self.input_x = tf.placeholder(tf.int32, [None,sequence_length], name ="train_input")
self.input_y = tf.placeholder(tf.float32, [None,output_size], name="train_output")
#self.train_input_embedding = tf.placeholder(tf.float32, [None,sequence_length,input_embedding_size], name="train_input_embedding")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
################
#Embedding layer
################
with tf.device('/cpu:0'), tf.name_scope("Embedding"):
self.W = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -0.0001, 0.0001),trainable=True, name="W", dtype = tf.float32)
self.embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, input_embedding_size], name="embedding_placeholder")
self.embedding_init = self.W.assign(self.embedding_placeholder)
self.train_input_embedding = tf.nn.embedding_lookup(self.W, self.input_x)
###########
#attention LSTM model
###########
# Define weights
with tf.name_scope("Model"):
weights = tf.Variable(tf.random_normal([2*hidden_size, output_size]))
biases = tf.Variable(tf.random_normal([output_size]))
#GRUCell/BasicLSTMCell
lstm_output, _ = bidirectional_dynamic_rnn(GRUCell(hidden_size),
GRUCell(hidden_size),
inputs=self.train_input_embedding,dtype=tf.float32)
# Attention layer
outputs, self.alphas = attention_mechanism.attention(lstm_output, ATTENTION_SIZE, return_alphas=True)
#outputs, self.alphas = attention_mechanism.attention(lstm_output, ATTENTION_SIZE, return_alphas=True)
# Add dropout
outputs = tf.nn.dropout(outputs, self.dropout_keep_prob)
# Get lstm cell output
#outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
self.scores = tf.matmul(outputs, weights) + biases
#self.scores = tf.matmul(outputs[-1], weights) + biases
self.pred_ops = tf.nn.softmax(self.scores)
self.predictions = tf.argmax(self.scores, 1, name="predictions")
# CalculateMean cross-entropy loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits = self.scores, labels = self.input_y)
self.loss = tf.reduce_mean(losses)# + l2_reg_lambda * l2_loss
# optimizer = tf.train.AdamOptimizer(learning_rate)
# grads_and_vars = optimizer.compute_gradients(self.loss)
# # global_step = tf.Variable(0, name="global_step", trainable=False)
# self.train_op = optimizer.apply_gradients(grads_and_vars)
# Accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
self.y = tf.argmax(self.input_y, 1)