-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdrnn.py
163 lines (145 loc) · 8.1 KB
/
drnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
'''
Created on 2020年7月14日
@author: Shaoyu Dou
'''
import copy
import itertools
import numpy as np
import tensorflow as tf
def dRNN(cell, inputs, rate, scope='default'):
"""
This function constructs a layer of dilated RNN.
Inputs:
cell -- the dilation operations is implemented independent of the RNN cell.
In theory, any valid tensorflow rnn cell should work.
inputs -- the input for the RNN. inputs should be in the form of
a list of 'n_steps' tenosrs. Each has shape (batch_size, input_dims)
rate -- the rate here refers to the 'dilations' in the orginal WaveNet paper.
input_length -- [b], int32 or int64
scope -- variable scope.
Outputs:
outputs -- the outputs from the RNN.
"""
b, d = tf.unstack(tf.shape(inputs[0]))
n_steps = len(inputs)#t
if rate < 0 or rate >= n_steps:#rate是一个int,代表本层的空洞数目
raise ValueError('The \'rate\' variable needs to be adjusted.')
print("Building layer: %s, input length: %d, dilation rate: %d, input dim: %d." % (
scope, n_steps, rate, inputs[0].get_shape()[1]))
# make the length of inputs divide 'rate', by using zero-padding
EVEN = (n_steps % rate) == 0#检查是否可以整除
if not EVEN:
# Create a tensor in shape (batch_size, input_dims), which all elements are zero.
# This is used for zero padding
zero_tensor = tf.zeros_like(inputs[0])#shape[b,d]
dialated_n_steps = n_steps // rate + 1
print("=====> %d time points need to be padded. " % (
dialated_n_steps * rate - n_steps))
print("=====> Input length for sub-RNN: %d" % (dialated_n_steps))
for i_pad in range(dialated_n_steps * rate - n_steps):
inputs.append(zero_tensor)#此时在imput列表中添加了几项,使得inputs的长度与rate能够整除
else:
dialated_n_steps = n_steps // rate
print("=====> Input length for sub-RNN: %d" % (dialated_n_steps))
# now the length of 'inputs' divide rate
# reshape it in the format of a list of tensors
# the length of the list is 'dialated_n_steps'
# the shape of each tensor is [batch_size * rate, input_dims]
# by stacking tensors that "colored" the same
# Example:
# n_steps is 5, rate is 2, inputs = [x1, x2, x3, x4, x5]
# zero-padding --> [x1, x2, x3, x4, x5, 0]
# we want to have --> [[x1; x2], [x3; x4], [x_5; 0]]
# which the length is the ceiling of n_steps/rate
#if n_steps is 5, rate is 2, inputs is [x0, x1, x2, x3, x4], zero padded inputs is [x0, x1, x2, x3, x4, x5=0], xi是[b,d]
#dialated_n_steps is 3
#dilated_inputs -[[x0,x1], [x2,x3], [x4,x5=0]]
# -> i=0 [x0,x1] [b*rate,d]以前是一个时刻输入一个样本,现在是一个时刻输入两个样本的concat,分别是同一条数据在rate个时间的值
# -> i=1 [x2,x3]
# -> i=2 [x4,x5]
#
#if rate = 3, dialated_n_steps is 2
#dilated_inputs -[[x0,x1,x2],[x3,x4,x5=0]]
# -> i=0 [x0,x1,x2]
# -> i=1 [x3,x4,x5=0]
#
#
#
#
# input shape, len = t, each of them is [b,d]
# dilated_inputs shape, len = dialated_n_steps, each of them is [b*rate, d]
#
dilated_inputs = [tf.concat(inputs[i * rate:(i + 1) * rate], axis=0) for i in range(dialated_n_steps)]
#dilated_inputs = [tf.concat(inputs[i * rate:(i + 1) * rate], axis=0) for i in range(dialated_n_steps)]
#seq_len指示的是tf.concat(inputs[i * rate:(i + 1) * rate], axis=0) 序列中的有效位数
# building a dialated RNN with reformated (dilated) inputs
'''
tf.nn.static_rnn(
cell,
inputs,
initial_state=None,
dtype=None,
sequence_length=None,
scope=None
)
Args:
cell: An instance of RNNCell.
inputs: A length T list of inputs, each a Tensor of shape [batch_size, input_size], or a nested tuple of such elements.
initial_state: (optional) An initial state for the RNN. If cell.state_size is an integer, this must be a Tensor of appropriate type and shape [batch_size, cell.state_size]. If cell.state_size is a tuple, this should be a tuple of tensors having shapes [batch_size, s] for s in cell.state_size.
dtype: (optional) The data type for the initial state and expected output. Required if initial_state is not provided or RNN state has a heterogeneous dtype.
sequence_length: Specifies the length of each sequence in inputs. An int32 or int64 vector (tensor) size [batch_size], values in [0, T).
scope: VariableScope for the created subgraph; defaults to "rnn".
Returns:
A pair (outputs, state) where:
outputs is a length T list of outputs (one for each input), or a nested tuple of such elements.
state is the final state
'''
#dilated_inputs shape, len = dialated_n_steps, each of them is [b*rate, d], 其实是将一个n_step的序列,变成了dialated_n_steps的, 即缩短了时间轴上的长度,
#但是为了不丢失信息,每个时间步长上的数据增多为batch*rate
#原来的结构中是一个时刻输入一个样本,一共输入n_step个时刻
#现在是一个时刻输入rate个样本,一共输入n_step//rate个时刻
#sequence_length: 由两个因素决定,一个是从placeholder中告知的,一个是前面的补0,其实应该由第一个因素来决定
#dilated_states:应该是[b*rate,,hidden_units],意义应该是RNN层在并行的处理了这个batch的数据后的final state
dilated_outputs, _ = tf.nn.static_rnn(#output shape ?? list, len = dialated_n_steps, each of them is [b*rate, hidden_units]
cell,
dilated_inputs,
dtype=tf.float32,
scope=scope)#dilated_states shape [b*rate, hidden_unit]
# reshape output back to the input format as a list of tensors with shape [batch_size, input_dims]
# split each element of the outputs from size [batch_size*rate, input_dims] to
# [[batch_size, input_dims], [batch_size, input_dims], ...] with length = rate
#list, len = dialated_n_steps, each of them is a list, len = rate, each of them is[b, hidden_units], 上面的作者注释应该写错了
splitted_outputs = [tf.split(output, rate, axis=0)
for output in dilated_outputs]
#list, len = dialated_n_steps * rate == n_step(就是补充过0之后的n_step数,应该是rate的倍数), each of them is [b, hidden_units]
unrolled_outputs = [output
for sublist in splitted_outputs for output in sublist]
# remove padded zeros
#list, len = n_step(不加pad的时候的数目), each of them is [b, hidden_units]
outputs = unrolled_outputs[:n_steps]#可以增加返回状态的项,decoder要用
dilated_states = outputs[-1]#对于GRU,state就是输出,所以只要截取最后一个时刻的输出即可
return outputs, dilated_states
def multi_dRNN_with_dilations(cells, inputs, dilations, scope='drnn'):
"""
This function constucts a multi-layer dilated RNN.
Inputs:
cells -- A list of RNN cells.
inputs -- A list of 'n_steps' tensors, each has shape (batch_size, input_dims).
dilations -- A list of integers with the same length of 'cells' indicates the dilations for each layer.
input_length -- [b]
initial_states -- A list of final state in each encoder layer
Outputs:
x -- A list of 'n_steps' tensors, as the outputs for the top layer of the multi-dRNN.
"""
drnn_inputs = tf.transpose(inputs, [1,0,2])#[t,b,d]
drnn_inputs = tf.unstack(drnn_inputs, axis=0)
assert (len(cells) == len(dilations))
output = copy.copy(drnn_inputs)
final_state_list = []
for cell, dilation in zip(cells, dilations):
scope_name = "multi_dRNN_dilation_%s_%d" % (scope, dilation)
output, state = dRNN(cell, output, dilation, scope=scope_name)
final_state_list.append(state)
output = tf.stack(output, axis=0)# [t,b,d]
output = tf.transpose(output, [1,0,2]) #[b,t,d]
return output, final_state_list