diff --git a/.gitignore b/.gitignore index be50232..a9ba243 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,4 @@ Dandelion.egg-info dandelion/__pycache__ dandelion/model/__pycache__ Cython/*.whl -site -Test \ No newline at end of file +site \ No newline at end of file diff --git a/Test/Test_BatchNorm.py b/Test/Test_BatchNorm.py new file mode 100644 index 0000000..0cfa8c9 --- /dev/null +++ b/Test/Test_BatchNorm.py @@ -0,0 +1,92 @@ +# coding:utf-8 +# Test for BatchNorm class +# Created : 2, 27, 2018 +# Revised : 2, 27, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from lasagne.layers import InputLayer, BatchNormLayer_DV, get_output, get_all_updates +import lasagne.nonlinearities as LACT +import dandelion +dandelion_path = os.path.split(dandelion.__file__)[0] +print('dandelion path = %s\n' % dandelion_path) + +class build_model_D(Module): + def __init__(self, input_shape=None, axes='auto'): + super().__init__() + self.input_shape = input_shape + self.axes = axes + self.bn = BatchNorm(input_shape=self.input_shape, axes=self.axes) + + def forward(self, x): + x = self.bn.forward(x) + return x + + def predict(self, x): + return self.bn.predict(x) + +def build_model_L(input_shape=None, axes='auto'): + input_var = tensor.ftensor4('x') + input0 = InputLayer(shape=input_shape, input_var=input_var, name='input0') + result = BatchNormLayer_DV(input0, axes=axes, name='bn0') + return result + +def fix_update_bcasts(updates): + for param, update in updates.items(): + if param.broadcastable != update.broadcastable: + updates[param] = tensor.patternbroadcast(update, param.broadcastable) + return updates + + +if __name__ == '__main__': + import numpy as np + from lasagne_ext.utils import get_layer_by_name + + B, C, H, W = 2, 1, 8, 8 + input_shape = (None, C, H, W) + axes = 'auto' + + model_D = build_model_D(input_shape=input_shape, axes=axes) + model_L = build_model_L(input_shape=input_shape, axes=axes) + + X = get_layer_by_name(model_L, 'input0').input_var + #--- predict ---# + if 0: + y_D = model_D.predict(X) + y_L = get_output(model_L, deterministic=True) + fn_L = theano.function([X], y_L, no_default_updates=True) + fn_D = theano.function([X], y_D, no_default_updates=True) + + #--- train ---# + if 1: + y_D = model_D.forward(X) + y_L = get_output(model_L, deterministic=False) + + update_L = fix_update_bcasts(get_all_updates(model_L)) + update_D = fix_update_bcasts(model_D.collect_self_updates()) + + fn_L = theano.function([X], y_L, updates=update_L, no_default_updates=True) + fn_D = theano.function([X], y_D, updates=update_D, no_default_updates=False) + # fn_L = theano.function([X], y_L, no_default_updates=True) + # fn_D = theano.function([X], y_D, no_default_updates=True) + + + for i in range(20): + x = np.random.rand(B, C, H, W).astype(np.float32) + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.sum(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + print(y_D) + print(y_L) + raise ValueError('diff is too big') + + print('Test passed') \ No newline at end of file diff --git a/Test/Test_ChainCRF.py b/Test/Test_ChainCRF.py new file mode 100644 index 0000000..3fcf2f6 --- /dev/null +++ b/Test/Test_ChainCRF.py @@ -0,0 +1,484 @@ +# coding:utf-8 +# Test ChainCRF() class with anago's implementation +# Created : 2, 12, 2018 +# Revised : 2, 12, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +from keras import backend as K +from keras import initializers, regularizers, constraints +from keras.engine import Layer, InputSpec +import theano, numpy as np +import theano.tensor as tensor + +def log_sum_exp(x, axis=None, keepdims=False): + """ + Stable log of a sum of exponentials + """ + x_max = tensor.max(x, axis=axis, keepdims=True) + z = tensor.log(tensor.sum(tensor.exp(x - x_max), axis=axis, keepdims=True)) + x_max + return z.sum(axis=axis, keepdims=keepdims) + +def path_energy(y, x, U, b_start=None, b_end=None, mask=None): + """Calculates the energy of a tag path y for a given input x (with mask), + transition energies U and boundary energies b_start, b_end.""" + x = add_boundary_energy(x, b_start, b_end, mask) + return path_energy0(y, x, U, mask) + + +def path_energy0(y, x, U, mask=None): + """Path energy without boundary potential handling.""" + n_classes = K.shape(x)[2] # x.shape = (B, T, N) + y_one_hot = K.one_hot(y, n_classes) # convert integer 'y' to one-hot encoded 'y': (B, T) -> (B, T, N) + + # Tag path energy + energy = K.sum(x * y_one_hot, 2) # (B, T, N) -> (B, T) + energy = K.sum(energy, 1) # (B, T) -> (B,) + + # Transition energy + y_t = y[:, :-1] # y_t, (B, T-1) + y_tp1 = y[:, 1:] # y_(t+1), (B, T-1) + U_flat = K.reshape(U, [-1]) # (N, N) -> (N*N,) + # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat: + flat_indices = y_t * n_classes + y_tp1 + U_y_t_tp1 = K.gather(U_flat, flat_indices) + + if mask is not None: + mask = K.cast(mask, K.floatx()) + y_t_mask = mask[:, :-1] + y_tp1_mask = mask[:, 1:] + U_y_t_tp1 *= y_t_mask * y_tp1_mask + + energy += K.sum(U_y_t_tp1, axis=1) + + return energy #(B,) + + +def sparse_chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None): + """Given the true sparsely encoded tag sequence y, input x (with mask), + transition energies U, boundary energies b_start and b_end, it computes + the loss function of a Linear Chain Conditional Random Field: + loss(y, x) = NLL(P(y|x)), where P(y|x) = exp(E(y, x)) / Z. + So, loss(y, x) = - E(y, x) + log(Z) + Here, E(y, x) is the tag path energy, and Z is the normalization constant. + The values log(Z) is also called free energy. + """ + x = add_boundary_energy(x, b_start, b_end, mask) + energy = path_energy0(y, x, U, mask) + energy -= free_energy0(x, U, mask) + return K.expand_dims(-energy, -1) + + +def chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None): + """Variant of sparse_chain_crf_loss but with one-hot encoded tags y.""" + y_sparse = K.argmax(y, -1) + y_sparse = K.cast(y_sparse, 'int32') + return sparse_chain_crf_loss(y_sparse, x, U, b_start, b_end, mask) + + +def add_boundary_energy(x, b_start=None, b_end=None, mask=None): + """Given the observations x, it adds the start boundary energy b_start (resp. + end boundary energy b_end on the start (resp. end) elements and multiplies + the mask.""" + if mask is None: + if b_start is not None: + x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1) # dim_1 is T + if b_end is not None: + x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1) + else: + mask = K.cast(mask, K.floatx()) + mask = K.expand_dims(mask, 2) + x *= mask + if b_start is not None: + mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1) + start_mask = K.cast(K.greater(mask, mask_r), K.floatx()) + x = x + start_mask * b_start + if b_end is not None: + mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1) + end_mask = K.cast(K.greater(mask, mask_l), K.floatx()) + x = x + end_mask * b_end + return x + + +def viterbi_decode(x, U, b_start=None, b_end=None, mask=None): + """Computes the best tag sequence y for a given input x, i.e. the one that + maximizes the value of path_energy.""" + x = add_boundary_energy(x, b_start, b_end, mask) + + alpha_0 = x[:, 0, :] # (B, N) + gamma_0 = K.zeros_like(alpha_0) + initial_states = [gamma_0, alpha_0] + # the following ``` lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)], ``` means [idx_max, value_max] + _, gamma = _forward(x, + lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)], + initial_states, + U, + mask) + # gamma: (B, T, N) + y = _backward(gamma, mask) + return y + + +def free_energy(x, U, b_start=None, b_end=None, mask=None): + """Computes efficiently the sum of all path energies for input x, when + runs over all possible tag sequences.""" + x = add_boundary_energy(x, b_start, b_end, mask) + return free_energy0(x, U, mask) + + +def free_energy0(x, U, mask=None): + """Free energy without boundary potential handling. + x: (B, T, N) + U: (N, N) + """ + initial_states = [x[:, 0, :]] # [B, N] + last_alpha, _ = _forward(x, + lambda B: [K.logsumexp(B, axis=1)], + initial_states, + U, + mask) + return last_alpha[:, 0] + + +def _forward(x, reduce_step, initial_states, U, mask=None): + """Forward recurrence of the linear chain crf.""" + + def _forward_step(energy_matrix_t, states): # (B, N, N), [(N,), (B, N)] + alpha_tm1 = states[-1] + new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) + return new_states[0], new_states + + U_shared = K.expand_dims(K.expand_dims(U, 0), 0) # (N, N) -> (1, 1, N, N) + + if mask is not None: + mask = K.cast(mask, K.floatx()) + mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) + U_shared = U_shared * mask_U + + inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared # (B, T-1, 1, N) + (1, 1, N, N) -> (B, T-1, N, N) + inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) # (B, T-1, N, N) -> (B, T, N, N) + + last, values, _ = K.rnn(_forward_step, inputs, initial_states) + return last, values + + +def batch_gather(reference, indices): + """ + + :param reference: (B, N) + :param indices: (B,) + :return: + """ + ref_shape = K.shape(reference) + batch_size = ref_shape[0] + n_classes = ref_shape[1] + flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices) + return K.gather(K.flatten(reference), flat_indices) + + +def _backward(gamma, mask): + """Backward recurrence of the linear chain crf.""" + gamma = K.cast(gamma, 'int32') # (B, T, N) + + def _backward_step(gamma_t, states): + # print('len(states)=', len(states)) + # print(type(states)) + # y_tm1 = K.squeeze(states[0], 0) + y_tm1 = states[0] + y_t = batch_gather(gamma_t, y_tm1) + # return y_t, [K.expand_dims(y_t, 0)] + # return K.expand_dims(y_t, 0), [K.expand_dims(y_t, 0)] + return y_t, [y_t] + + # initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] # (1, B) + initial_states = [K.zeros_like(gamma[:, 0, 0])] # (1, B) + _, y_rev, _ = K.rnn(_backward_step, + gamma, + initial_states, + go_backwards=True) + y = K.reverse(y_rev, 1) + + if mask is not None: + mask = K.cast(mask, dtype='int32') + # mask output + y *= mask + # set masked values to -1 + y += -(1 - mask) + return y + +#--------- ChainCRF() -------------------# +def CRF_forward(observations, transitions): + """ + + :param observations: (B, T, N) + :param transitions: (N, N) + :param viterbi: + :param return_alpha: + :param return_best_sequence: + :return: + """ + U = transitions.dimshuffle('x', 'x', 0, 1) # (N, N) -> (1, 1, N, N) + initial = observations[:, 0, :] # (B, N) + tensor.unbroadcast(initial, 0, 1) + x = observations[:, 1:, :] # (B, T-1, N) + x = x.dimshuffle(0, 1, 'x', 2) #(B, T-1, N) -> (B, T-1, 1, N) + x = x + U + # x = tensor.concatenate([x, tensor.zeros_like(x[:, -1:, :, :])], axis=1) # (B, T-1, N, N) -> (B, T, N, N) + x = x.dimshuffle(1, 0, 2, 3) # (T, B, N, N) + + def recurrence(energy_matrix_t, states): + """ + :param energy_matrix_t: (B, N, N) + + :return: + """ + alpha_tm1 = states # (B,N) + alpha_tm1 = alpha_tm1.dimshuffle(0, 1, 'x') # (B, N, 1) + x = alpha_tm1 + energy_matrix_t # (B, N, N) + new_states = log_sum_exp(x, axis=1) # (B, N) + return new_states + + # alpha: (T, B, N) + alpha, _ = theano.scan( + fn=recurrence, + sequences=x, + outputs_info=[initial] + ) + # return alpha[-1, :, :] # (B,) + return alpha.dimshuffle(1, 0, 2) + +def CRF_decode(observations, transitions): + """ + + :param observations: (B, T, N) + :param transitions: (N, N) + :param viterbi: + :param return_alpha: + :param return_best_sequence: + :return: + """ + alpha_0 = observations[:, 0, :] # (B, N) + gamma_0 = tensor.zeros_like(alpha_0, dtype='int64') + + U = transitions.dimshuffle('x', 'x', 0, 1) # (N, N) -> (1, 1, N, N) + x = observations[:, 1:, :] # (B, T-1, N) + x = x.dimshuffle(0, 1, 'x', 2) #(B, T-1, N) -> (B, T-1, 1, N) + x = x + U # (B, T-1 N, N) + x = tensor.concatenate([x, tensor.zeros_like(x[:, -1:, :, :])], axis=1) # (B, T-1, N, N) -> (B, T, N, N) + x = x.dimshuffle(1, 0, 2, 3) # (T, B, N, N) + + def recurrence(energy_matrix_t, index_tm1, score_tm1): + """ + :param energy_matrix_t: (B, N, N) + + :return: + """ + score_tm1 = score_tm1.dimshuffle(0, 1, 'x') # (B, N, 1) + x = score_tm1 + energy_matrix_t # (B, N, N) + index = tensor.argmax(x, axis=1) # (B, N) + score = tensor.max(x, axis=1) # (B, N) + return index, score + + # gamma, alpha: (T, B, N) + result, _ = theano.scan( + fn=recurrence, + sequences=x, + outputs_info=[gamma_0, alpha_0] + ) + gamma, alpha = result + + def backward_step(gamma_t, y_tm1): + y_t = batch_gather(gamma_t, y_tm1) + return y_t + + T, B, N = gamma.shape + initial = tensor.zeros(shape=(B,), dtype='int64') + # y : (T, B) + y, _ = theano.scan( + fn=backward_step, + sequences=gamma, + outputs_info=[initial], + go_backwards=True + ) + y = y.dimshuffle(1, 0) + y = y[:, ::-1] + y = tensor.cast(y, 'int32') + return y # (B, T) + # return gamma.dimshuffle(1, 0, 2) + +#--------- ChainCRF_Lample() ------------# +def CRF_forward_nobatch(observations, transitions, viterbi=False, return_alpha=False, return_best_sequence=False): + """ + Takes as input: + [DV] (T+2, N+2), (N+2, N+2) + - observations, sequence of shape (n_steps, n_classes) + - transitions, sequence of shape (n_classes, n_classes) + Probabilities must be given in the log space. + Compute alpha, matrix of size (n_steps, n_classes), such that + alpha[i, j] represents one of these 2 values: + - the probability that the real path at node i ends in j + - the maximum probability of a path finishing in j at node i (Viterbi) + Returns one of these 2 values: + - alpha + - the final probability, which can be: + - the sum of the probabilities of all paths + - the probability of the best path (Viterbi) + """ + assert not return_best_sequence or (viterbi and not return_alpha) + + def recurrence(obs, previous, transitions): + """ + + :param obs: (N+2,) + :param previous: (N+2,) + :param transitions: (N+2, N+2) + :return: + """ + previous = previous.dimshuffle(0, 'x') # (N+2,) -> (N+2, 1) + obs = obs.dimshuffle('x', 0) # (N+2,) -> (1, N+2) + if viterbi: + scores = previous + obs + transitions + out = scores.max(axis=0) + if return_best_sequence: + out2 = scores.argmax(axis=0) + return out, out2 + else: + return out + else: + return log_sum_exp(previous + obs + transitions, axis=0) + + initial = observations[0] # = b_s = [[small] * T, 0, small] + alpha, _ = theano.scan( + fn=recurrence, + outputs_info=(initial, None) if return_best_sequence else initial, + sequences=[observations[1:]], + non_sequences=transitions + ) + + if return_alpha: + return alpha + elif return_best_sequence: + sequence, _ = theano.scan( + fn=lambda beta_i, previous: beta_i[previous], + outputs_info=tensor.cast(tensor.argmax(alpha[0][-1]), 'int32'), + sequences=tensor.cast(alpha[1][::-1], 'int32') + ) + sequence = tensor.concatenate([sequence[::-1], [tensor.argmax(alpha[0][-1])]]) + return sequence + else: + if viterbi: + return alpha[-1].max(axis=0) + else: + return log_sum_exp(alpha[-1], + axis=0) # p(x). Here alpha is equivalent to beta in "CRF as NN Layer", Page10. + + + +if __name__ == '__main__': + + if 0: + y = tensor.imatrix('y') + x = tensor.ftensor3('x') + U = tensor.fmatrix('U') + x_nobatch = tensor.fmatrix('x_nobatch') + + cost1 = free_energy0(x, U) + cost2 = CRF_forward(x, U) + cost3 = CRF_forward_nobatch(x_nobatch, U) + seq1 = viterbi_decode(x, U) + seq2 = CRF_decode(x, U) + print('compiling f1 & f2 ...') + f1 = theano.function([x,U], cost1) + f2 = theano.function([x,U], cost2) + print('compiling f3 ...') + f3 = theano.function([x,U], seq1) + print('compiling f4 ...') + f4 = theano.function([x,U], seq2) + print('compiling f5 ...') + f5 = theano.function([x_nobatch, U], cost3) + + B, T, N = 7, 100, 20 + for i in range(1): + x = np.random.rand(B, T, N).astype(np.float32) + U = np.random.rand(N, N).astype(np.float32) + + c1 = f1(x,U) + c2 = f2(x,U) + s1 = f3(x, U) + s2 = f4(x, U) + print(c1) + print(c2) + print(c1==c2) + if np.all(c1==c2): + print('c pass') + else: + raise ValueError('c not same!') + + print(s1) + print(s2) + print(s1.shape) + print(s2.shape) + print(s1==s2) + if np.all(s1==s2): + print('s pass') + else: + raise ValueError('s not same!') + + if 1: + y = tensor.imatrix('y') + x = tensor.ftensor3('x') + U = tensor.fmatrix('U') + x_nobatch = tensor.fmatrix('x_nobatch') + + cost2 = CRF_forward(x, U) + cost3 = CRF_forward_nobatch(x_nobatch, U, return_alpha=True) + seq2 = CRF_decode(x, U) + seq3 = CRF_forward_nobatch(x_nobatch, U, viterbi=True, return_best_sequence=True) + print('compiling f2 ...') + f2 = theano.function([x, U], cost2) + print('compiling f4 ...') + f4 = theano.function([x, U], seq2) + print('compiling f5 ...') + f5 = theano.function([x_nobatch, U], cost3) + print('compiling f6 ...') + f6 = theano.function([x_nobatch, U], seq3) + + B, T, N = 2, 5, 3 + for i in range(10): + x = np.random.rand(B, T, N).astype(np.float32) + U = np.random.rand(N, N).astype(np.float32) + + c2 = f2(x, U) + s2 = f4(x, U) + + c3_list = [] + s3_list = [] + for j in range(B): + c3_nobatch = f5(x[j,:,:], U) + s3_nobatch = f6(x[j, :, :], U) + c3_list.append(np.expand_dims(c3_nobatch, 0)) + s3_list.append(np.expand_dims(s3_nobatch, 0)) + c3 = np.concatenate(c3_list, axis=0) + s3 = np.concatenate(s3_list, axis=0) + + # if np.all(c2 == c3): + # print('c pass') + # else: + print(c2) + print(c3) + # raise ValueError('c not same!') + + print(s2) + print(s3) + if np.all(s2 == s3): + print('s pass') + else: + + raise ValueError('s not same!') + + + + diff --git a/Test/Test_Conv2D.py b/Test/Test_Conv2D.py new file mode 100644 index 0000000..b6b9212 --- /dev/null +++ b/Test/Test_Conv2D.py @@ -0,0 +1,87 @@ +# coding:utf-8 +# Test for Conv2D class +# Created : 1, 31, 2018 +# Revised : 1, 31, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from lasagne.layers import InputLayer, Conv2DLayer, get_output +import lasagne.nonlinearities as LACT +import dandelion +dandelion_path = os.path.split(dandelion.__file__)[0] +print('dandelion path = %s\n' % dandelion_path) + +class build_model_D(Module): + def __init__(self, in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1): + super().__init__() + self.conv2d = Conv2D(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=stride, + pad=pad, dilation=dilation, num_groups=num_groups) + self.predict = self.forward + + def forward(self, x): + """ + + :param x: (B, C, H, W) + :return: + """ + x = self.conv2d.forward(x) + x = relu(x) + return x + +def build_model_L(in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1): + input_var = tensor.ftensor4('x') # (B, C, H, W) + input0 = InputLayer(shape=(None, in_channel, None, None), input_var=input_var, name='input0') + conv0 = Conv2DLayer(input0, num_filters=out_channel, filter_size=kernel_size, stride=stride, pad=pad, nonlinearity=LACT.rectify, + name='conv0') + return conv0 + + +if __name__ == '__main__': + import numpy as np + from lasagne_ext.utils import get_layer_by_name + + in_channel = 3; out_channel = 3;kernel_size = (3, 3); stride = (1, 1); pad = 'valid';dilation = (1,1);num_groups = 1 + model_D = build_model_D(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride, + pad=pad, dilation=dilation, num_groups=num_groups) + model_L = build_model_L(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride, + pad=pad) + + W = np.random.rand(out_channel, in_channel, kernel_size[0], kernel_size[1]).astype(np.float32) + b = np.random.rand(out_channel).astype(np.float32) + + model_D.conv2d.W.set_value(W) + model_D.conv2d.b.set_value(b) + + conv_L = get_layer_by_name(model_L, 'conv0') + conv_L.W.set_value(W) + conv_L.b.set_value(b) + + X = get_layer_by_name(model_L, 'input0').input_var + y_D = model_D.forward(X) + y_L = get_output(model_L) + + fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore') + fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore') + + for i in range(20): + x = np.random.rand(3, in_channel, 32, 32).astype(np.float32) - 0.5 + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.max(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + print('y_D=\n', y_D) + print('y_L=\n', y_L) + raise ValueError('diff is too big') + + print('Test passed') + + + diff --git a/Test/Test_ConvTransposed2D.py b/Test/Test_ConvTransposed2D.py new file mode 100644 index 0000000..4faa0cc --- /dev/null +++ b/Test/Test_ConvTransposed2D.py @@ -0,0 +1,87 @@ +# coding:utf-8 +# Test for ConvTransposed2D class +# Created : 3, 2, 2018 +# Revised : 3, 2, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from lasagne.layers import InputLayer, TransposedConv2DLayer, get_output +import lasagne.nonlinearities as LACT +import dandelion +dandelion_path = os.path.split(dandelion.__file__)[0] +print('dandelion path = %s\n' % dandelion_path) + +class build_model_D(Module): + def __init__(self, in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1): + super().__init__() + self.tconv2d = ConvTransposed2D(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=stride, + pad=pad, dilation=dilation, num_groups=num_groups) + self.predict = self.forward + + def forward(self, x): + """ + + :param x: (B, C, H, W) + :return: + """ + x = self.tconv2d.forward(x) + # x = relu(x) + return x + +def build_model_L(in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1): + input_var = tensor.ftensor4('x') # (B, C, H, W) + input0 = InputLayer(shape=(None, in_channel, None, None), input_var=input_var, name='input0') + tconv0 = TransposedConv2DLayer(input0, num_filters=out_channel, filter_size=kernel_size, stride=stride, crop=pad, nonlinearity=LACT.linear, + name='tconv0') + return tconv0 + + +if __name__ == '__main__': + import numpy as np + from lasagne_ext.utils import get_layer_by_name + + in_channel = 3; out_channel = 3;kernel_size = (3, 3); stride = (1, 1); pad = 'valid';dilation = (1,1);num_groups = 1 + model_D = build_model_D(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride, + pad=pad, dilation=dilation, num_groups=num_groups) + model_L = build_model_L(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride, + pad=pad) + + W = np.random.rand(out_channel, in_channel, kernel_size[0], kernel_size[1]).astype(np.float32) + b = np.random.rand(out_channel).astype(np.float32) + + model_D.tconv2d.W.set_value(W) + model_D.tconv2d.b.set_value(b) + + conv_L = get_layer_by_name(model_L, 'tconv0') + conv_L.W.set_value(W) + conv_L.b.set_value(b) + + X = get_layer_by_name(model_L, 'input0').input_var + y_D = model_D.forward(X) + y_L = get_output(model_L) + + fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore') + fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore') + + for i in range(20): + x = np.random.rand(8, in_channel, 33, 32).astype(np.float32) - 0.5 + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.max(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + print('y_D=\n', y_D) + print('y_L=\n', y_L) + raise ValueError('diff is too big') + + print('Test passed') + + + diff --git a/Test/Test_Dense.py b/Test/Test_Dense.py new file mode 100644 index 0000000..075ce39 --- /dev/null +++ b/Test/Test_Dense.py @@ -0,0 +1,72 @@ +# coding:utf-8 +# Unit test for Dense class +# Created : 1, 30, 2018 +# Revised : 1, 30, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from lasagne.layers import InputLayer, DenseLayer, get_output +import lasagne.nonlinearities as LACT + +class build_model_D(Module): + def __init__(self, in_dim=3, out_dim=3): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + self.dense = Dense(input_dims=self.in_dim, output_dim=self.out_dim) + self.predict = self.forward + + def forward(self, x): + x = self.dense.forward(x) + x = softmax(x) + return x + +def build_model_L(in_dim=3, out_dim=3): + input_var = tensor.fmatrix('x') + input0 = InputLayer(shape=(None, in_dim), input_var=input_var, name='input0') + dense0 = DenseLayer(input0, num_units=out_dim, nonlinearity=LACT.softmax, name='dense0') + return dense0 + + +if __name__ == '__main__': + import numpy as np + from lasagne_ext.utils import get_layer_by_name + + in_dim, out_dim = 500, 16 + model_D = build_model_D(in_dim=in_dim, out_dim=out_dim) + model_L = build_model_L(in_dim=in_dim, out_dim=out_dim) + + W = np.random.rand(in_dim, out_dim).astype(np.float32) + b = np.random.rand(out_dim).astype(np.float32) + model_D.dense.W.set_value(W) + model_D.dense.b.set_value(b) + get_layer_by_name(model_L, 'dense0').W.set_value(W) + get_layer_by_name(model_L, 'dense0').b.set_value(b) + + X = get_layer_by_name(model_L, 'input0').input_var + y_D = model_D.forward(X) + y_L = get_output(model_L) + + fn_D = theano.function([X], y_D, no_default_updates=True) + fn_L = theano.function([X], y_L, no_default_updates=True) + + for i in range(20): + x = np.random.rand(16, in_dim).astype(np.float32) + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.sum(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + raise ValueError('diff is too big') + + print('Test passed') + + + diff --git a/Test/Test_GRU.py b/Test/Test_GRU.py new file mode 100644 index 0000000..64cc5c9 --- /dev/null +++ b/Test/Test_GRU.py @@ -0,0 +1,102 @@ +# coding:utf-8 +# Unit test for GRU class +# Created : 1, 31, 2018 +# Revised : 1, 31, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from lasagne.layers import InputLayer, GRULayer, get_output +import lasagne.nonlinearities as LACT + +class build_model_D(Module): + def __init__(self, in_dim=3, out_dim=3): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + self.gru = GRU(input_dims=self.in_dim, hidden_dim=self.out_dim) + self.predict = self.forward + + def forward(self, x): + """ + + :param x: (B, T, D) + :return: + """ + x = x.dimshuffle((1, 0, 2)) # ->(T, B, D) + x = self.gru.forward(x, backward=False, only_return_final=False) + x = x.dimshuffle((1, 0, 2)) # ->(B, T, D) + # x = tanh(x) + return x + +def build_model_L(in_dim=3, out_dim=3): + input_var = tensor.ftensor3('x') # (B, T, D) + input0 = InputLayer(shape=(None, None, in_dim), input_var=input_var, name='input0') + gru0 = GRULayer(input0, num_units=out_dim, precompute_input=True, + backwards=False, only_return_final=False, learn_init=True, + name='gru0') + return gru0 + + +if __name__ == '__main__': + import numpy as np + from lasagne_ext.utils import get_layer_by_name + + in_dim, out_dim = 6, 5 + model_D = build_model_D(in_dim=in_dim, out_dim=out_dim) + model_L = build_model_L(in_dim=in_dim, out_dim=out_dim) + + W_in = np.random.rand(in_dim, 3*out_dim).astype(np.float32) + b_in = np.random.rand(3*out_dim).astype(np.float32) + W_hid = np.random.rand(out_dim, 3*out_dim).astype(np.float32) + h_ini = np.random.rand(out_dim).astype(np.float32) + + model_D.gru.W_in.set_value(W_in) + model_D.gru.b_in.set_value(b_in) + model_D.gru.W_hid.set_value(W_hid) + model_D.gru.h_ini.set_value(h_ini) + + gru_L = get_layer_by_name(model_L, 'gru0') + gru_L.W_in_to_resetgate.set_value(W_in[:, :out_dim]) + gru_L.W_in_to_updategate.set_value(W_in[:, out_dim:2*out_dim]) + gru_L.W_in_to_hidden_update.set_value(W_in[:, 2*out_dim:3*out_dim]) + + gru_L.W_hid_to_resetgate.set_value(W_hid[:, :out_dim]) + gru_L.W_hid_to_updategate.set_value(W_hid[:, out_dim:2*out_dim]) + gru_L.W_hid_to_hidden_update.set_value(W_hid[:, 2*out_dim:3*out_dim]) + + gru_L.b_resetgate.set_value(b_in[:out_dim]) + gru_L.b_updategate.set_value(b_in[out_dim:2*out_dim]) + gru_L.b_hidden_update.set_value(b_in[2*out_dim:3*out_dim]) + + gru_L.hid_init.set_value(h_ini.reshape((1, out_dim))) + + + X = get_layer_by_name(model_L, 'input0').input_var + y_D = model_D.forward(X) + y_L = get_output(model_L) + + fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore') + fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore') + + for i in range(20): + x = np.random.rand(2, 5, in_dim).astype(np.float32) - 0.5 + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.max(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + print('y_D=\n', y_D) + print('y_L=\n', y_L) + raise ValueError('diff is too big') + + print('Test passed') + + + diff --git a/Test/Test_LSTM.py b/Test/Test_LSTM.py new file mode 100644 index 0000000..316c196 --- /dev/null +++ b/Test/Test_LSTM.py @@ -0,0 +1,117 @@ +# coding:utf-8 +# Unit test for LSTM class +# Created : 1, 30, 2018 +# Revised : 1, 30, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from lasagne.layers import InputLayer, DenseLayer, LSTMLayer, get_output, Upscale2DLayer, TransposedConv2DLayer +import lasagne.nonlinearities as LACT + +class build_model_D(Module): + def __init__(self, in_dim=3, out_dim=3): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + self.lstm = LSTM(input_dims=self.in_dim, hidden_dim=self.out_dim, peephole=True) + self.predict = self.forward + + def forward(self, x): + """ + + :param x: (B, T, D) + :return: + """ + x = x.dimshuffle((1, 0, 2)) # ->(T, B, D) + x = self.lstm.forward(x, backward=True, only_return_final=True) + # x = x.dimshuffle((1, 0, 2)) # ->(B, T, D) + # x = tanh(x) + return x + +def build_model_L(in_dim=3, out_dim=3): + input_var = tensor.ftensor3('x') # (B, T, D) + input0 = InputLayer(shape=(None, None, in_dim), input_var=input_var, name='input0') + lstm0 = LSTMLayer(input0, num_units=out_dim, precompute_input=True, nonlinearity=LACT.tanh, + backwards=True, only_return_final=True, learn_init=True, consume_less='None', + name='lstm0') + return lstm0 + + +if __name__ == '__main__': + import numpy as np + from lasagne_ext.utils import get_layer_by_name + + in_dim, out_dim = 32, 3 + model_D = build_model_D(in_dim=in_dim, out_dim=out_dim) + model_L = build_model_L(in_dim=in_dim, out_dim=out_dim) + + W_in = np.random.rand(in_dim, 4*out_dim).astype(np.float32) + b_in = np.random.rand(4*out_dim).astype(np.float32) + W_hid = np.random.rand(out_dim, 4*out_dim).astype(np.float32) + h_ini = np.random.rand(out_dim).astype(np.float32) + c_ini = np.random.rand(out_dim).astype(np.float32) + w_cell_to_igate = np.random.rand(out_dim).astype(np.float32) + w_cell_to_fgate = np.random.rand(out_dim).astype(np.float32) + w_cell_to_ogate = np.random.rand(out_dim).astype(np.float32) + + model_D.lstm.W_in.set_value(W_in) + model_D.lstm.b_in.set_value(b_in) + model_D.lstm.W_hid.set_value(W_hid) + model_D.lstm.h_ini.set_value(h_ini) + model_D.lstm.c_ini.set_value(c_ini) + model_D.lstm.w_cell_to_igate.set_value(w_cell_to_igate) + model_D.lstm.w_cell_to_fgate.set_value(w_cell_to_fgate) + model_D.lstm.w_cell_to_ogate.set_value(w_cell_to_ogate) + + lstm_L = get_layer_by_name(model_L, 'lstm0') + lstm_L.W_in_to_ingate.set_value(W_in[:, :out_dim]) + lstm_L.W_in_to_forgetgate.set_value(W_in[:, out_dim:2*out_dim]) + lstm_L.W_in_to_cell.set_value(W_in[:, 2*out_dim:3*out_dim]) + lstm_L.W_in_to_outgate.set_value(W_in[:, 3*out_dim:]) + + lstm_L.W_hid_to_ingate.set_value(W_hid[:, :out_dim]) + lstm_L.W_hid_to_forgetgate.set_value(W_hid[:, out_dim:2*out_dim]) + lstm_L.W_hid_to_cell.set_value(W_hid[:, 2*out_dim:3*out_dim]) + lstm_L.W_hid_to_outgate.set_value(W_hid[:, 3*out_dim:]) + + lstm_L.b_ingate.set_value(b_in[:out_dim]) + lstm_L.b_forgetgate.set_value(b_in[out_dim:2*out_dim]) + lstm_L.b_cell.set_value(b_in[2*out_dim:3*out_dim]) + lstm_L.b_outgate.set_value(b_in[3*out_dim:]) + + lstm_L.hid_init.set_value(h_ini.reshape((1, out_dim))) + lstm_L.cell_init.set_value(c_ini.reshape((1, out_dim))) + + lstm_L.W_cell_to_ingate.set_value(w_cell_to_igate) + lstm_L.W_cell_to_forgetgate.set_value(w_cell_to_fgate) + lstm_L.W_cell_to_outgate.set_value(w_cell_to_ogate) + + X = get_layer_by_name(model_L, 'input0').input_var + y_D = model_D.forward(X) + y_L = get_output(model_L) + + fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore') + fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore') + + for i in range(20): + x = np.random.rand(4, 16, in_dim).astype(np.float32) + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.max(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + print('y_D=\n', y_D) + print('y_L=\n', y_L) + raise ValueError('diff is too big') + + print('Test passed') + + + diff --git a/Test/Test_Unet.py b/Test/Test_Unet.py new file mode 100644 index 0000000..4e63011 --- /dev/null +++ b/Test/Test_Unet.py @@ -0,0 +1,37 @@ +# coding:utf-8 +# Test for unet, partial. +# Created : 5, 25, 2018 +# Revised : 5, 25, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.activation import * +from dandelion.model.unet import model_Unet + +import dandelion +dandelion_path = os.path.split(dandelion.__file__)[0] +print('dandelion path = %s\n' % dandelion_path) + +if __name__ == '__main__': + im_height, im_width = 65, 63 + model = model_Unet(im_height=im_height, im_width=im_width) + x = tensor.ftensor4('x') + y = model.forward(x) + print('compiling fn...') + fn = theano.function([x], y, no_default_updates=False) + print('run fn...') + input = np.random.rand(7, 1, im_height, im_width).astype(np.float32) + output = fn(input) + print(output) + print(output.shape) + + print('Test passed') + + + diff --git a/Test/Test_pooling.py b/Test/Test_pooling.py new file mode 100644 index 0000000..7ceee77 --- /dev/null +++ b/Test/Test_pooling.py @@ -0,0 +1,63 @@ +# coding:utf-8 +# Unit test for pooling functions +# Created : 2, 27, 2018 +# Revised : 2, 27, 2018 +# All rights reserved +#------------------------------------------------------------------------------------------------ +__author__ = 'dawei.leng' +import os, sys, psutil +os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'" + +import theano +from theano import tensor +from dandelion.module import * +from dandelion.functional import * + +def pool_1d_Lasagne(x, axis=1, mode='max'): + """ + Lasagne requires x is 3D, and pooling is done on the last dimension + :param x: + :param axis: + :return: + """ + input_4d = tensor.shape_padright(x, 1) + if axis == 1: + input_4d = input_4d.dimshuffle((0, 2, 1, 3)) + pooled = pool_2d(input_4d, + ws=(2, 1), + stride=(2, 1), + ignore_border=True, + pad=(0, 0), + mode=mode, + ) + if axis == 1: # [DV] add support for 'axis' para + pooled = pooled.dimshuffle((0, 2, 1, 3)) + return pooled[:, :, :, 0] + +if __name__ == '__main__': + import numpy as np + + x_3d = tensor.ftensor3('x') + y_3d_D = pool_1d(x_3d, axis=1) + + y_3d_L = pool_1d_Lasagne(x_3d, axis=1) + + fn_D = theano.function([x_3d], y_3d_D, no_default_updates=True, on_unused_input='ignore') + fn_L = theano.function([x_3d], y_3d_L, no_default_updates=True, on_unused_input='ignore') + + + for i in range(20): + x = np.random.rand(7, 117, 27).astype(np.float32) + y_D = fn_D(x) + y_L = fn_L(x) + diff = np.max(np.abs(y_D - y_L)) + print('i=%d, diff=%0.6f' % (i, diff)) + if diff>1e-4: + print('y_D=\n', y_D) + print('y_L=\n', y_L) + raise ValueError('diff is too big') + + print('Test passed') + + + diff --git a/Test/__init__.py b/Test/__init__.py new file mode 100644 index 0000000..a262ecb --- /dev/null +++ b/Test/__init__.py @@ -0,0 +1 @@ +from .. import dandelion diff --git a/Test/todo.txt b/Test/todo.txt new file mode 100644 index 0000000..d9fb193 --- /dev/null +++ b/Test/todo.txt @@ -0,0 +1 @@ +Build unit test interface for each Test_*.py \ No newline at end of file diff --git a/dandelion/__init__.py b/dandelion/__init__.py index c39b31c..42d3718 100644 --- a/dandelion/__init__.py +++ b/dandelion/__init__.py @@ -7,5 +7,5 @@ from . import functional from . import model -__version__ = "0.15.1" +__version__ = "0.15.2" __author__ = "David Leon (Dawei Leng)" diff --git a/dandelion/module.py b/dandelion/module.py index 07c3cc2..eb5ea17 100644 --- a/dandelion/module.py +++ b/dandelion/module.py @@ -978,6 +978,9 @@ def __init__(self, in_channels, out_channels, kernel_size=(3,3), stride=(1,1), p else: self.b = self.register_param(b, shape=[out_channels], name='b_TConv2D') + self.predict = self.forward # predict() is the same with forward() for this layer + + def forward(self, input): if self.pad[0] == 'same': border_mode = 'half' elif self.pad[0] == 'valid': @@ -986,20 +989,17 @@ def __init__(self, in_channels, out_channels, kernel_size=(3,3), stride=(1,1), p border_mode = 'full' else: border_mode = self.pad - self.convTOP = tensor.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=[None, self.out_channels, self.output_shape[0], self.output_shape[1]], + convTOP = tensor.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=[None, self.out_channels, self.output_shape[0], self.output_shape[1]], kshp=self.W_shape, border_mode=border_mode, subsample=self.stride, filter_flip=not self.flip_filters, filter_dilation=self.dilation, num_groups=self.num_groups) - self.predict = self.forward # predict() is the same with forward() for this layer - - def forward(self, input): + output_shape = self.output_shape if any(s is None for s in self.output_shape): B, C, H, W = input.shape - self.output_shape = tuple(conv_input_length(input, filter, stride, p) + output_shape = tuple(conv_input_length(input, filter, stride, p) for input, filter, stride, p in zip([H, W], self.kernel_size, self.stride, self.pad)) - - conved = self.convTOP(self.W, input, self.output_shape) + conved = convTOP(self.W, input, output_shape) if self.b is None: output = conved elif self.untie_bias: diff --git a/docs/history.md b/docs/history.md index ec01d69..d26cc88 100644 --- a/docs/history.md +++ b/docs/history.md @@ -1,5 +1,9 @@ # History +## version 0.15.2 [5-28-2018] +* **FIXED**: `convTOP` should be constructed each time the `forward()` function of `ConvTransposed2D` is called. + + ## version 0.15.1 [5-25-2018] * **NEW**: add `model` module into master branch of Dandelion * **NEW**: add U-net FCN implementation into `model` module