diff --git a/.gitignore b/.gitignore
index be50232..a9ba243 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,4 @@ Dandelion.egg-info
 dandelion/__pycache__
 dandelion/model/__pycache__
 Cython/*.whl
-site
-Test
\ No newline at end of file
+site
\ No newline at end of file
diff --git a/Test/Test_BatchNorm.py b/Test/Test_BatchNorm.py
new file mode 100644
index 0000000..0cfa8c9
--- /dev/null
+++ b/Test/Test_BatchNorm.py
@@ -0,0 +1,92 @@
+# coding:utf-8
+# Test for BatchNorm class
+# Created   :   2, 27, 2018
+# Revised   :   2, 27, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from lasagne.layers import InputLayer, BatchNormLayer_DV, get_output, get_all_updates
+import lasagne.nonlinearities as LACT
+import dandelion
+dandelion_path = os.path.split(dandelion.__file__)[0]
+print('dandelion path = %s\n' % dandelion_path)
+
+class build_model_D(Module):
+    def __init__(self, input_shape=None, axes='auto'):
+        super().__init__()
+        self.input_shape = input_shape
+        self.axes = axes
+        self.bn = BatchNorm(input_shape=self.input_shape, axes=self.axes)
+
+    def forward(self, x):
+        x = self.bn.forward(x)
+        return x
+
+    def predict(self, x):
+        return self.bn.predict(x)
+
+def build_model_L(input_shape=None, axes='auto'):
+    input_var = tensor.ftensor4('x')
+    input0 = InputLayer(shape=input_shape, input_var=input_var, name='input0')
+    result = BatchNormLayer_DV(input0, axes=axes, name='bn0')
+    return result
+
+def fix_update_bcasts(updates):
+    for param, update in updates.items():
+        if param.broadcastable != update.broadcastable:
+            updates[param] = tensor.patternbroadcast(update, param.broadcastable)
+    return updates
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from lasagne_ext.utils import get_layer_by_name
+
+    B, C, H, W = 2, 1, 8, 8
+    input_shape = (None, C, H, W)
+    axes = 'auto'
+
+    model_D = build_model_D(input_shape=input_shape, axes=axes)
+    model_L = build_model_L(input_shape=input_shape, axes=axes)
+
+    X = get_layer_by_name(model_L, 'input0').input_var
+    #--- predict ---#
+    if 0:
+        y_D = model_D.predict(X)
+        y_L = get_output(model_L, deterministic=True)
+        fn_L = theano.function([X], y_L, no_default_updates=True)
+        fn_D = theano.function([X], y_D, no_default_updates=True)
+
+    #--- train ---#
+    if 1:
+        y_D = model_D.forward(X)
+        y_L = get_output(model_L, deterministic=False)
+
+    update_L = fix_update_bcasts(get_all_updates(model_L))
+    update_D = fix_update_bcasts(model_D.collect_self_updates())
+
+    fn_L = theano.function([X], y_L, updates=update_L, no_default_updates=True)
+    fn_D = theano.function([X], y_D, updates=update_D, no_default_updates=False)
+    # fn_L = theano.function([X], y_L, no_default_updates=True)
+    # fn_D = theano.function([X], y_D, no_default_updates=True)
+
+
+    for i in range(20):
+        x = np.random.rand(B, C, H, W).astype(np.float32)
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.sum(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            print(y_D)
+            print(y_L)
+            raise ValueError('diff is too big')
+
+    print('Test passed')
\ No newline at end of file
diff --git a/Test/Test_ChainCRF.py b/Test/Test_ChainCRF.py
new file mode 100644
index 0000000..3fcf2f6
--- /dev/null
+++ b/Test/Test_ChainCRF.py
@@ -0,0 +1,484 @@
+# coding:utf-8
+# Test ChainCRF() class with anago's implementation
+# Created   :   2, 12, 2018
+# Revised   :   2, 12, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+from keras import backend as K
+from keras import initializers, regularizers, constraints
+from keras.engine import Layer, InputSpec
+import theano, numpy as np
+import theano.tensor as tensor
+
+def log_sum_exp(x, axis=None, keepdims=False):
+    """
+    Stable log of a sum of exponentials
+    """
+    x_max = tensor.max(x, axis=axis, keepdims=True)
+    z = tensor.log(tensor.sum(tensor.exp(x - x_max), axis=axis, keepdims=True)) + x_max
+    return z.sum(axis=axis, keepdims=keepdims)
+
+def path_energy(y, x, U, b_start=None, b_end=None, mask=None):
+    """Calculates the energy of a tag path y for a given input x (with mask),
+    transition energies U and boundary energies b_start, b_end."""
+    x = add_boundary_energy(x, b_start, b_end, mask)
+    return path_energy0(y, x, U, mask)
+
+
+def path_energy0(y, x, U, mask=None):
+    """Path energy without boundary potential handling."""
+    n_classes = K.shape(x)[2]                   # x.shape = (B, T, N)
+    y_one_hot = K.one_hot(y, n_classes)         # convert integer 'y' to one-hot encoded 'y': (B, T) -> (B, T, N)
+
+    # Tag path energy
+    energy = K.sum(x * y_one_hot, 2)    # (B, T, N) -> (B, T)
+    energy = K.sum(energy, 1)           # (B, T) -> (B,)
+
+    # Transition energy
+    y_t = y[:, :-1]                     # y_t, (B, T-1)
+    y_tp1 = y[:, 1:]                    # y_(t+1), (B, T-1)
+    U_flat = K.reshape(U, [-1])         # (N, N) -> (N*N,)
+    # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat:
+    flat_indices = y_t * n_classes + y_tp1
+    U_y_t_tp1 = K.gather(U_flat, flat_indices)
+
+    if mask is not None:
+        mask = K.cast(mask, K.floatx())
+        y_t_mask = mask[:, :-1]
+        y_tp1_mask = mask[:, 1:]
+        U_y_t_tp1 *= y_t_mask * y_tp1_mask
+
+    energy += K.sum(U_y_t_tp1, axis=1)
+
+    return energy  #(B,)
+
+
+def sparse_chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None):
+    """Given the true sparsely encoded tag sequence y, input x (with mask),
+    transition energies U, boundary energies b_start and b_end, it computes
+    the loss function of a Linear Chain Conditional Random Field:
+    loss(y, x) = NLL(P(y|x)), where P(y|x) = exp(E(y, x)) / Z.
+    So, loss(y, x) = - E(y, x) + log(Z)
+    Here, E(y, x) is the tag path energy, and Z is the normalization constant.
+    The values log(Z) is also called free energy.
+    """
+    x = add_boundary_energy(x, b_start, b_end, mask)
+    energy = path_energy0(y, x, U, mask)
+    energy -= free_energy0(x, U, mask)
+    return K.expand_dims(-energy, -1)
+
+
+def chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None):
+    """Variant of sparse_chain_crf_loss but with one-hot encoded tags y."""
+    y_sparse = K.argmax(y, -1)
+    y_sparse = K.cast(y_sparse, 'int32')
+    return sparse_chain_crf_loss(y_sparse, x, U, b_start, b_end, mask)
+
+
+def add_boundary_energy(x, b_start=None, b_end=None, mask=None):
+    """Given the observations x, it adds the start boundary energy b_start (resp.
+    end boundary energy b_end on the start (resp. end) elements and multiplies
+    the mask."""
+    if mask is None:
+        if b_start is not None:
+            x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1)  # dim_1 is T
+        if b_end is not None:
+            x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1)
+    else:
+        mask = K.cast(mask, K.floatx())
+        mask = K.expand_dims(mask, 2)
+        x *= mask
+        if b_start is not None:
+            mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1)
+            start_mask = K.cast(K.greater(mask, mask_r), K.floatx())
+            x = x + start_mask * b_start
+        if b_end is not None:
+            mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1)
+            end_mask = K.cast(K.greater(mask, mask_l), K.floatx())
+            x = x + end_mask * b_end
+    return x
+
+
+def viterbi_decode(x, U, b_start=None, b_end=None, mask=None):
+    """Computes the best tag sequence y for a given input x, i.e. the one that
+    maximizes the value of path_energy."""
+    x = add_boundary_energy(x, b_start, b_end, mask)
+
+    alpha_0 = x[:, 0, :]   # (B, N)
+    gamma_0 = K.zeros_like(alpha_0)
+    initial_states = [gamma_0, alpha_0]
+    # the following ``` lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)], ``` means [idx_max, value_max]
+    _, gamma = _forward(x,
+                        lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)],
+                        initial_states,
+                        U,
+                        mask)
+    # gamma: (B, T, N)
+    y = _backward(gamma, mask)
+    return y
+
+
+def free_energy(x, U, b_start=None, b_end=None, mask=None):
+    """Computes efficiently the sum of all path energies for input x, when
+    runs over all possible tag sequences."""
+    x = add_boundary_energy(x, b_start, b_end, mask)
+    return free_energy0(x, U, mask)
+
+
+def free_energy0(x, U, mask=None):
+    """Free energy without boundary potential handling.
+    x: (B, T, N)
+    U: (N, N)
+    """
+    initial_states = [x[:, 0, :]]         # [B, N]
+    last_alpha, _ = _forward(x,
+                             lambda B: [K.logsumexp(B, axis=1)],
+                             initial_states,
+                             U,
+                             mask)
+    return last_alpha[:, 0]
+
+
+def _forward(x, reduce_step, initial_states, U, mask=None):
+    """Forward recurrence of the linear chain crf."""
+
+    def _forward_step(energy_matrix_t, states):  # (B, N, N), [(N,), (B, N)]
+        alpha_tm1 = states[-1]
+        new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t)
+        return new_states[0], new_states
+
+    U_shared = K.expand_dims(K.expand_dims(U, 0), 0)   # (N, N) -> (1, 1, N, N)
+
+    if mask is not None:
+        mask = K.cast(mask, K.floatx())
+        mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3)
+        U_shared = U_shared * mask_U
+
+    inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared     # (B, T-1, 1, N) + (1, 1, N, N) -> (B, T-1, N, N)
+    inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) # (B, T-1, N, N) -> (B, T, N, N)
+
+    last, values, _ = K.rnn(_forward_step, inputs, initial_states)
+    return last, values
+
+
+def batch_gather(reference, indices):
+    """
+
+    :param reference: (B, N)
+    :param indices: (B,)
+    :return:
+    """
+    ref_shape = K.shape(reference)
+    batch_size = ref_shape[0]
+    n_classes = ref_shape[1]
+    flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices)
+    return K.gather(K.flatten(reference), flat_indices)
+
+
+def _backward(gamma, mask):
+    """Backward recurrence of the linear chain crf."""
+    gamma = K.cast(gamma, 'int32')   # (B, T, N)
+
+    def _backward_step(gamma_t, states):
+        # print('len(states)=', len(states))
+        # print(type(states))
+        # y_tm1 = K.squeeze(states[0], 0)
+        y_tm1 = states[0]
+        y_t = batch_gather(gamma_t, y_tm1)
+        # return y_t, [K.expand_dims(y_t, 0)]
+        # return K.expand_dims(y_t, 0), [K.expand_dims(y_t, 0)]
+        return y_t, [y_t]
+
+    # initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)]  # (1, B)
+    initial_states = [K.zeros_like(gamma[:, 0, 0])]  # (1, B)
+    _, y_rev, _ = K.rnn(_backward_step,
+                        gamma,
+                        initial_states,
+                        go_backwards=True)
+    y = K.reverse(y_rev, 1)
+
+    if mask is not None:
+        mask = K.cast(mask, dtype='int32')
+        # mask output
+        y *= mask
+        # set masked values to -1
+        y += -(1 - mask)
+    return y
+
+#--------- ChainCRF() -------------------#
+def CRF_forward(observations, transitions):
+    """
+
+    :param observations: (B, T, N)
+    :param transitions:  (N, N)
+    :param viterbi:
+    :param return_alpha:
+    :param return_best_sequence:
+    :return:
+    """
+    U = transitions.dimshuffle('x', 'x', 0, 1)  # (N, N) -> (1, 1, N, N)
+    initial = observations[:, 0, :] # (B, N)
+    tensor.unbroadcast(initial, 0, 1)
+    x = observations[:, 1:, :]  # (B, T-1, N)
+    x = x.dimshuffle(0, 1, 'x', 2) #(B, T-1, N) -> (B, T-1, 1, N)
+    x = x + U
+    # x = tensor.concatenate([x, tensor.zeros_like(x[:, -1:, :, :])], axis=1)  # (B, T-1, N, N) -> (B, T, N, N)
+    x = x.dimshuffle(1, 0, 2, 3) # (T, B, N, N)
+
+    def recurrence(energy_matrix_t, states):
+        """
+        :param energy_matrix_t: (B, N, N)
+
+        :return:
+        """
+        alpha_tm1 = states # (B,N)
+        alpha_tm1 = alpha_tm1.dimshuffle(0, 1, 'x') # (B, N, 1)
+        x = alpha_tm1 + energy_matrix_t  # (B, N, N)
+        new_states = log_sum_exp(x, axis=1)  # (B, N)
+        return new_states
+
+    # alpha: (T, B, N)
+    alpha, _ = theano.scan(
+        fn=recurrence,
+        sequences=x,
+        outputs_info=[initial]
+    )
+    # return alpha[-1, :, :]  # (B,)
+    return alpha.dimshuffle(1, 0, 2)
+
+def CRF_decode(observations, transitions):
+    """
+
+    :param observations: (B, T, N)
+    :param transitions:  (N, N)
+    :param viterbi:
+    :param return_alpha:
+    :param return_best_sequence:
+    :return:
+    """
+    alpha_0 = observations[:, 0, :] # (B, N)
+    gamma_0 = tensor.zeros_like(alpha_0, dtype='int64')
+
+    U = transitions.dimshuffle('x', 'x', 0, 1)   # (N, N) -> (1, 1, N, N)
+    x = observations[:, 1:, :]  # (B, T-1, N)
+    x = x.dimshuffle(0, 1, 'x', 2) #(B, T-1, N) -> (B, T-1, 1, N)
+    x = x + U    # (B, T-1 N, N)
+    x = tensor.concatenate([x, tensor.zeros_like(x[:, -1:, :, :])], axis=1)  # (B, T-1, N, N) -> (B, T, N, N)
+    x = x.dimshuffle(1, 0, 2, 3) # (T, B, N, N)
+
+    def recurrence(energy_matrix_t, index_tm1, score_tm1):
+        """
+        :param energy_matrix_t: (B, N, N)
+
+        :return:
+        """
+        score_tm1 = score_tm1.dimshuffle(0, 1, 'x') # (B, N, 1)
+        x = score_tm1 + energy_matrix_t  # (B, N, N)
+        index = tensor.argmax(x, axis=1) # (B, N)
+        score = tensor.max(x, axis=1)    # (B, N)
+        return index, score
+
+    # gamma, alpha: (T, B, N)
+    result, _ = theano.scan(
+        fn=recurrence,
+        sequences=x,
+        outputs_info=[gamma_0, alpha_0]
+    )
+    gamma, alpha = result
+
+    def backward_step(gamma_t, y_tm1):
+        y_t   = batch_gather(gamma_t, y_tm1)
+        return y_t
+
+    T, B, N = gamma.shape
+    initial = tensor.zeros(shape=(B,), dtype='int64')
+    # y : (T, B)
+    y, _ = theano.scan(
+        fn=backward_step,
+        sequences=gamma,
+        outputs_info=[initial],
+        go_backwards=True
+    )
+    y = y.dimshuffle(1, 0)
+    y = y[:, ::-1]
+    y = tensor.cast(y, 'int32')
+    return y  # (B, T)
+    # return gamma.dimshuffle(1, 0, 2)
+
+#--------- ChainCRF_Lample() ------------#
+def CRF_forward_nobatch(observations, transitions, viterbi=False, return_alpha=False, return_best_sequence=False):
+    """
+    Takes as input:
+        [DV]  (T+2, N+2), (N+2, N+2)
+        - observations, sequence of shape (n_steps, n_classes)
+        - transitions, sequence of shape (n_classes, n_classes)
+    Probabilities must be given in the log space.
+    Compute alpha, matrix of size (n_steps, n_classes), such that
+    alpha[i, j] represents one of these 2 values:
+        - the probability that the real path at node i ends in j
+        - the maximum probability of a path finishing in j at node i (Viterbi)
+    Returns one of these 2 values:
+        - alpha
+        - the final probability, which can be:
+            - the sum of the probabilities of all paths
+            - the probability of the best path (Viterbi)
+    """
+    assert not return_best_sequence or (viterbi and not return_alpha)
+
+    def recurrence(obs, previous, transitions):
+        """
+
+        :param obs: (N+2,)
+        :param previous: (N+2,)
+        :param transitions: (N+2, N+2)
+        :return:
+        """
+        previous = previous.dimshuffle(0, 'x')  # (N+2,) -> (N+2, 1)
+        obs = obs.dimshuffle('x', 0)  # (N+2,) -> (1, N+2)
+        if viterbi:
+            scores = previous + obs + transitions
+            out = scores.max(axis=0)
+            if return_best_sequence:
+                out2 = scores.argmax(axis=0)
+                return out, out2
+            else:
+                return out
+        else:
+            return log_sum_exp(previous + obs + transitions, axis=0)
+
+    initial = observations[0]  # = b_s = [[small] * T, 0, small]
+    alpha, _ = theano.scan(
+        fn=recurrence,
+        outputs_info=(initial, None) if return_best_sequence else initial,
+        sequences=[observations[1:]],
+        non_sequences=transitions
+    )
+
+    if return_alpha:
+        return alpha
+    elif return_best_sequence:
+        sequence, _ = theano.scan(
+            fn=lambda beta_i, previous: beta_i[previous],
+            outputs_info=tensor.cast(tensor.argmax(alpha[0][-1]), 'int32'),
+            sequences=tensor.cast(alpha[1][::-1], 'int32')
+        )
+        sequence = tensor.concatenate([sequence[::-1], [tensor.argmax(alpha[0][-1])]])
+        return sequence
+    else:
+        if viterbi:
+            return alpha[-1].max(axis=0)
+        else:
+            return log_sum_exp(alpha[-1],
+                               axis=0)  # p(x). Here alpha is equivalent to beta in "CRF as NN Layer", Page10.
+
+
+
+if __name__ == '__main__':
+
+    if 0:
+        y = tensor.imatrix('y')
+        x = tensor.ftensor3('x')
+        U = tensor.fmatrix('U')
+        x_nobatch = tensor.fmatrix('x_nobatch')
+
+        cost1 = free_energy0(x, U)
+        cost2 = CRF_forward(x, U)
+        cost3 = CRF_forward_nobatch(x_nobatch, U)
+        seq1  = viterbi_decode(x, U)
+        seq2  = CRF_decode(x, U)
+        print('compiling f1 & f2 ...')
+        f1 = theano.function([x,U], cost1)
+        f2 = theano.function([x,U], cost2)
+        print('compiling f3 ...')
+        f3 = theano.function([x,U], seq1)
+        print('compiling f4 ...')
+        f4 = theano.function([x,U], seq2)
+        print('compiling f5 ...')
+        f5 = theano.function([x_nobatch, U], cost3)
+
+        B, T, N = 7, 100, 20
+        for i in range(1):
+            x = np.random.rand(B, T, N).astype(np.float32)
+            U = np.random.rand(N, N).astype(np.float32)
+
+            c1 = f1(x,U)
+            c2 = f2(x,U)
+            s1 = f3(x, U)
+            s2 = f4(x, U)
+            print(c1)
+            print(c2)
+            print(c1==c2)
+            if np.all(c1==c2):
+                print('c pass')
+            else:
+                raise ValueError('c not same!')
+
+            print(s1)
+            print(s2)
+            print(s1.shape)
+            print(s2.shape)
+            print(s1==s2)
+            if np.all(s1==s2):
+                print('s pass')
+            else:
+                raise ValueError('s not same!')
+
+    if 1:
+        y = tensor.imatrix('y')
+        x = tensor.ftensor3('x')
+        U = tensor.fmatrix('U')
+        x_nobatch = tensor.fmatrix('x_nobatch')
+
+        cost2 = CRF_forward(x, U)
+        cost3 = CRF_forward_nobatch(x_nobatch, U, return_alpha=True)
+        seq2  = CRF_decode(x, U)
+        seq3  = CRF_forward_nobatch(x_nobatch, U, viterbi=True, return_best_sequence=True)
+        print('compiling f2 ...')
+        f2 = theano.function([x, U], cost2)
+        print('compiling f4 ...')
+        f4 = theano.function([x, U], seq2)
+        print('compiling f5 ...')
+        f5 = theano.function([x_nobatch, U], cost3)
+        print('compiling f6 ...')
+        f6 = theano.function([x_nobatch, U], seq3)
+
+        B, T, N = 2, 5, 3
+        for i in range(10):
+            x = np.random.rand(B, T, N).astype(np.float32)
+            U = np.random.rand(N, N).astype(np.float32)
+
+            c2 = f2(x, U)
+            s2 = f4(x, U)
+
+            c3_list = []
+            s3_list = []
+            for j in range(B):
+                c3_nobatch = f5(x[j,:,:], U)
+                s3_nobatch = f6(x[j, :, :], U)
+                c3_list.append(np.expand_dims(c3_nobatch, 0))
+                s3_list.append(np.expand_dims(s3_nobatch, 0))
+            c3 = np.concatenate(c3_list, axis=0)
+            s3 = np.concatenate(s3_list, axis=0)
+
+            # if np.all(c2 == c3):
+            #     print('c pass')
+            # else:
+            print(c2)
+            print(c3)
+                # raise ValueError('c not same!')
+
+            print(s2)
+            print(s3)
+            if np.all(s2 == s3):
+                print('s pass')
+            else:
+
+                raise ValueError('s not same!')
+
+
+
+
diff --git a/Test/Test_Conv2D.py b/Test/Test_Conv2D.py
new file mode 100644
index 0000000..b6b9212
--- /dev/null
+++ b/Test/Test_Conv2D.py
@@ -0,0 +1,87 @@
+# coding:utf-8
+# Test for Conv2D class
+# Created   :   1, 31, 2018
+# Revised   :   1, 31, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from lasagne.layers import InputLayer, Conv2DLayer, get_output
+import lasagne.nonlinearities as LACT
+import dandelion
+dandelion_path = os.path.split(dandelion.__file__)[0]
+print('dandelion path = %s\n' % dandelion_path)
+
+class build_model_D(Module):
+    def __init__(self, in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1):
+        super().__init__()
+        self.conv2d = Conv2D(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=stride,
+                             pad=pad, dilation=dilation, num_groups=num_groups)
+        self.predict = self.forward
+
+    def forward(self, x):
+        """
+
+        :param x: (B, C, H, W)
+        :return:
+        """
+        x = self.conv2d.forward(x)
+        x = relu(x)
+        return x
+
+def build_model_L(in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1):
+    input_var = tensor.ftensor4('x')  # (B, C, H, W)
+    input0 = InputLayer(shape=(None, in_channel, None, None), input_var=input_var, name='input0')
+    conv0  = Conv2DLayer(input0, num_filters=out_channel, filter_size=kernel_size, stride=stride, pad=pad, nonlinearity=LACT.rectify,
+                        name='conv0')
+    return conv0
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from lasagne_ext.utils import get_layer_by_name
+
+    in_channel = 3; out_channel = 3;kernel_size = (3, 3); stride = (1, 1); pad = 'valid';dilation = (1,1);num_groups = 1
+    model_D = build_model_D(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride,
+                             pad=pad, dilation=dilation, num_groups=num_groups)
+    model_L = build_model_L(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride,
+                             pad=pad)
+
+    W = np.random.rand(out_channel, in_channel, kernel_size[0], kernel_size[1]).astype(np.float32)
+    b = np.random.rand(out_channel).astype(np.float32)
+
+    model_D.conv2d.W.set_value(W)
+    model_D.conv2d.b.set_value(b)
+
+    conv_L = get_layer_by_name(model_L, 'conv0')
+    conv_L.W.set_value(W)
+    conv_L.b.set_value(b)
+
+    X = get_layer_by_name(model_L, 'input0').input_var
+    y_D = model_D.forward(X)
+    y_L = get_output(model_L)
+
+    fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore')
+    fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore')
+
+    for i in range(20):
+        x = np.random.rand(3, in_channel, 32, 32).astype(np.float32) - 0.5
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.max(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            print('y_D=\n', y_D)
+            print('y_L=\n', y_L)
+            raise ValueError('diff is too big')
+
+    print('Test passed')
+
+
+
diff --git a/Test/Test_ConvTransposed2D.py b/Test/Test_ConvTransposed2D.py
new file mode 100644
index 0000000..4faa0cc
--- /dev/null
+++ b/Test/Test_ConvTransposed2D.py
@@ -0,0 +1,87 @@
+# coding:utf-8
+# Test for ConvTransposed2D class
+# Created   :   3,  2, 2018
+# Revised   :   3,  2, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from lasagne.layers import InputLayer, TransposedConv2DLayer, get_output
+import lasagne.nonlinearities as LACT
+import dandelion
+dandelion_path = os.path.split(dandelion.__file__)[0]
+print('dandelion path = %s\n' % dandelion_path)
+
+class build_model_D(Module):
+    def __init__(self, in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1):
+        super().__init__()
+        self.tconv2d = ConvTransposed2D(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=stride,
+                             pad=pad, dilation=dilation, num_groups=num_groups)
+        self.predict = self.forward
+
+    def forward(self, x):
+        """
+
+        :param x: (B, C, H, W)
+        :return:
+        """
+        x = self.tconv2d.forward(x)
+        # x = relu(x)
+        return x
+
+def build_model_L(in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1):
+    input_var = tensor.ftensor4('x')  # (B, C, H, W)
+    input0 = InputLayer(shape=(None, in_channel, None, None), input_var=input_var, name='input0')
+    tconv0  = TransposedConv2DLayer(input0, num_filters=out_channel, filter_size=kernel_size, stride=stride, crop=pad, nonlinearity=LACT.linear,
+                        name='tconv0')
+    return tconv0
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from lasagne_ext.utils import get_layer_by_name
+
+    in_channel = 3; out_channel = 3;kernel_size = (3, 3); stride = (1, 1); pad = 'valid';dilation = (1,1);num_groups = 1
+    model_D = build_model_D(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride,
+                             pad=pad, dilation=dilation, num_groups=num_groups)
+    model_L = build_model_L(in_channel=in_channel, out_channel=out_channel, kernel_size=kernel_size, stride=stride,
+                             pad=pad)
+
+    W = np.random.rand(out_channel, in_channel, kernel_size[0], kernel_size[1]).astype(np.float32)
+    b = np.random.rand(out_channel).astype(np.float32)
+
+    model_D.tconv2d.W.set_value(W)
+    model_D.tconv2d.b.set_value(b)
+
+    conv_L = get_layer_by_name(model_L, 'tconv0')
+    conv_L.W.set_value(W)
+    conv_L.b.set_value(b)
+
+    X = get_layer_by_name(model_L, 'input0').input_var
+    y_D = model_D.forward(X)
+    y_L = get_output(model_L)
+
+    fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore')
+    fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore')
+
+    for i in range(20):
+        x = np.random.rand(8, in_channel, 33, 32).astype(np.float32) - 0.5
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.max(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            print('y_D=\n', y_D)
+            print('y_L=\n', y_L)
+            raise ValueError('diff is too big')
+
+    print('Test passed')
+
+
+
diff --git a/Test/Test_Dense.py b/Test/Test_Dense.py
new file mode 100644
index 0000000..075ce39
--- /dev/null
+++ b/Test/Test_Dense.py
@@ -0,0 +1,72 @@
+# coding:utf-8
+# Unit test for Dense class
+# Created   :   1, 30, 2018
+# Revised   :   1, 30, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from lasagne.layers import InputLayer, DenseLayer, get_output
+import lasagne.nonlinearities as LACT
+
+class build_model_D(Module):
+    def __init__(self, in_dim=3, out_dim=3):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.dense = Dense(input_dims=self.in_dim, output_dim=self.out_dim)
+        self.predict = self.forward
+
+    def forward(self, x):
+        x = self.dense.forward(x)
+        x = softmax(x)
+        return x
+
+def build_model_L(in_dim=3, out_dim=3):
+    input_var = tensor.fmatrix('x')
+    input0 = InputLayer(shape=(None, in_dim), input_var=input_var, name='input0')
+    dense0 = DenseLayer(input0, num_units=out_dim, nonlinearity=LACT.softmax, name='dense0')
+    return dense0
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from lasagne_ext.utils import get_layer_by_name
+
+    in_dim, out_dim = 500, 16
+    model_D = build_model_D(in_dim=in_dim, out_dim=out_dim)
+    model_L = build_model_L(in_dim=in_dim, out_dim=out_dim)
+
+    W = np.random.rand(in_dim, out_dim).astype(np.float32)
+    b = np.random.rand(out_dim).astype(np.float32)
+    model_D.dense.W.set_value(W)
+    model_D.dense.b.set_value(b)
+    get_layer_by_name(model_L, 'dense0').W.set_value(W)
+    get_layer_by_name(model_L, 'dense0').b.set_value(b)
+
+    X = get_layer_by_name(model_L, 'input0').input_var
+    y_D = model_D.forward(X)
+    y_L = get_output(model_L)
+
+    fn_D = theano.function([X], y_D, no_default_updates=True)
+    fn_L = theano.function([X], y_L, no_default_updates=True)
+
+    for i in range(20):
+        x = np.random.rand(16, in_dim).astype(np.float32)
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.sum(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            raise ValueError('diff is too big')
+
+    print('Test passed')
+
+
+
diff --git a/Test/Test_GRU.py b/Test/Test_GRU.py
new file mode 100644
index 0000000..64cc5c9
--- /dev/null
+++ b/Test/Test_GRU.py
@@ -0,0 +1,102 @@
+# coding:utf-8
+# Unit test for GRU class
+# Created   :   1, 31, 2018
+# Revised   :   1, 31, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from lasagne.layers import InputLayer, GRULayer, get_output
+import lasagne.nonlinearities as LACT
+
+class build_model_D(Module):
+    def __init__(self, in_dim=3, out_dim=3):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.gru = GRU(input_dims=self.in_dim, hidden_dim=self.out_dim)
+        self.predict = self.forward
+
+    def forward(self, x):
+        """
+
+        :param x: (B, T, D)
+        :return:
+        """
+        x = x.dimshuffle((1, 0, 2)) # ->(T, B, D)
+        x = self.gru.forward(x, backward=False, only_return_final=False)
+        x = x.dimshuffle((1, 0, 2)) # ->(B, T, D)
+        # x = tanh(x)
+        return x
+
+def build_model_L(in_dim=3, out_dim=3):
+    input_var = tensor.ftensor3('x')  # (B, T, D)
+    input0 = InputLayer(shape=(None, None, in_dim), input_var=input_var, name='input0')
+    gru0 = GRULayer(input0, num_units=out_dim, precompute_input=True,
+                      backwards=False, only_return_final=False, learn_init=True,
+                      name='gru0')
+    return gru0
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from lasagne_ext.utils import get_layer_by_name
+
+    in_dim, out_dim = 6, 5
+    model_D = build_model_D(in_dim=in_dim, out_dim=out_dim)
+    model_L = build_model_L(in_dim=in_dim, out_dim=out_dim)
+
+    W_in = np.random.rand(in_dim, 3*out_dim).astype(np.float32)
+    b_in = np.random.rand(3*out_dim).astype(np.float32)
+    W_hid = np.random.rand(out_dim, 3*out_dim).astype(np.float32)
+    h_ini = np.random.rand(out_dim).astype(np.float32)
+
+    model_D.gru.W_in.set_value(W_in)
+    model_D.gru.b_in.set_value(b_in)
+    model_D.gru.W_hid.set_value(W_hid)
+    model_D.gru.h_ini.set_value(h_ini)
+ 
+    gru_L = get_layer_by_name(model_L, 'gru0')
+    gru_L.W_in_to_resetgate.set_value(W_in[:, :out_dim])
+    gru_L.W_in_to_updategate.set_value(W_in[:, out_dim:2*out_dim])
+    gru_L.W_in_to_hidden_update.set_value(W_in[:, 2*out_dim:3*out_dim])
+
+    gru_L.W_hid_to_resetgate.set_value(W_hid[:, :out_dim])
+    gru_L.W_hid_to_updategate.set_value(W_hid[:, out_dim:2*out_dim])
+    gru_L.W_hid_to_hidden_update.set_value(W_hid[:, 2*out_dim:3*out_dim])
+
+    gru_L.b_resetgate.set_value(b_in[:out_dim])
+    gru_L.b_updategate.set_value(b_in[out_dim:2*out_dim])
+    gru_L.b_hidden_update.set_value(b_in[2*out_dim:3*out_dim])
+
+    gru_L.hid_init.set_value(h_ini.reshape((1, out_dim)))
+
+
+    X = get_layer_by_name(model_L, 'input0').input_var
+    y_D = model_D.forward(X)
+    y_L = get_output(model_L)
+
+    fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore')
+    fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore')
+
+    for i in range(20):
+        x = np.random.rand(2, 5, in_dim).astype(np.float32) - 0.5
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.max(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            print('y_D=\n', y_D)
+            print('y_L=\n', y_L)
+            raise ValueError('diff is too big')
+
+    print('Test passed')
+
+
+
diff --git a/Test/Test_LSTM.py b/Test/Test_LSTM.py
new file mode 100644
index 0000000..316c196
--- /dev/null
+++ b/Test/Test_LSTM.py
@@ -0,0 +1,117 @@
+# coding:utf-8
+# Unit test for LSTM class
+# Created   :   1, 30, 2018
+# Revised   :   1, 30, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from lasagne.layers import InputLayer, DenseLayer, LSTMLayer, get_output, Upscale2DLayer, TransposedConv2DLayer
+import lasagne.nonlinearities as LACT
+
+class build_model_D(Module):
+    def __init__(self, in_dim=3, out_dim=3):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.lstm = LSTM(input_dims=self.in_dim, hidden_dim=self.out_dim, peephole=True)
+        self.predict = self.forward
+
+    def forward(self, x):
+        """
+
+        :param x: (B, T, D)
+        :return:
+        """
+        x = x.dimshuffle((1, 0, 2)) # ->(T, B, D)
+        x = self.lstm.forward(x, backward=True, only_return_final=True)
+        # x = x.dimshuffle((1, 0, 2)) # ->(B, T, D)
+        # x = tanh(x)
+        return x
+
+def build_model_L(in_dim=3, out_dim=3):
+    input_var = tensor.ftensor3('x')  # (B, T, D)
+    input0 = InputLayer(shape=(None, None, in_dim), input_var=input_var, name='input0')
+    lstm0 = LSTMLayer(input0, num_units=out_dim, precompute_input=True, nonlinearity=LACT.tanh,
+                      backwards=True, only_return_final=True, learn_init=True, consume_less='None',
+                      name='lstm0')
+    return lstm0
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from lasagne_ext.utils import get_layer_by_name
+
+    in_dim, out_dim = 32, 3
+    model_D = build_model_D(in_dim=in_dim, out_dim=out_dim)
+    model_L = build_model_L(in_dim=in_dim, out_dim=out_dim)
+
+    W_in = np.random.rand(in_dim, 4*out_dim).astype(np.float32)
+    b_in = np.random.rand(4*out_dim).astype(np.float32)
+    W_hid = np.random.rand(out_dim, 4*out_dim).astype(np.float32)
+    h_ini = np.random.rand(out_dim).astype(np.float32)
+    c_ini = np.random.rand(out_dim).astype(np.float32)
+    w_cell_to_igate = np.random.rand(out_dim).astype(np.float32)
+    w_cell_to_fgate = np.random.rand(out_dim).astype(np.float32)
+    w_cell_to_ogate = np.random.rand(out_dim).astype(np.float32)
+
+    model_D.lstm.W_in.set_value(W_in)
+    model_D.lstm.b_in.set_value(b_in)
+    model_D.lstm.W_hid.set_value(W_hid)
+    model_D.lstm.h_ini.set_value(h_ini)
+    model_D.lstm.c_ini.set_value(c_ini)
+    model_D.lstm.w_cell_to_igate.set_value(w_cell_to_igate)
+    model_D.lstm.w_cell_to_fgate.set_value(w_cell_to_fgate)
+    model_D.lstm.w_cell_to_ogate.set_value(w_cell_to_ogate)
+
+    lstm_L = get_layer_by_name(model_L, 'lstm0')
+    lstm_L.W_in_to_ingate.set_value(W_in[:, :out_dim])
+    lstm_L.W_in_to_forgetgate.set_value(W_in[:, out_dim:2*out_dim])
+    lstm_L.W_in_to_cell.set_value(W_in[:, 2*out_dim:3*out_dim])
+    lstm_L.W_in_to_outgate.set_value(W_in[:, 3*out_dim:])
+
+    lstm_L.W_hid_to_ingate.set_value(W_hid[:, :out_dim])
+    lstm_L.W_hid_to_forgetgate.set_value(W_hid[:, out_dim:2*out_dim])
+    lstm_L.W_hid_to_cell.set_value(W_hid[:, 2*out_dim:3*out_dim])
+    lstm_L.W_hid_to_outgate.set_value(W_hid[:, 3*out_dim:])
+
+    lstm_L.b_ingate.set_value(b_in[:out_dim])
+    lstm_L.b_forgetgate.set_value(b_in[out_dim:2*out_dim])
+    lstm_L.b_cell.set_value(b_in[2*out_dim:3*out_dim])
+    lstm_L.b_outgate.set_value(b_in[3*out_dim:])
+
+    lstm_L.hid_init.set_value(h_ini.reshape((1, out_dim)))
+    lstm_L.cell_init.set_value(c_ini.reshape((1, out_dim)))
+
+    lstm_L.W_cell_to_ingate.set_value(w_cell_to_igate)
+    lstm_L.W_cell_to_forgetgate.set_value(w_cell_to_fgate)
+    lstm_L.W_cell_to_outgate.set_value(w_cell_to_ogate)
+
+    X = get_layer_by_name(model_L, 'input0').input_var
+    y_D = model_D.forward(X)
+    y_L = get_output(model_L)
+
+    fn_D = theano.function([X], y_D, no_default_updates=True, on_unused_input='ignore')
+    fn_L = theano.function([X], y_L, no_default_updates=True, on_unused_input='ignore')
+
+    for i in range(20):
+        x = np.random.rand(4, 16, in_dim).astype(np.float32)
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.max(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            print('y_D=\n', y_D)
+            print('y_L=\n', y_L)
+            raise ValueError('diff is too big')
+
+    print('Test passed')
+
+
+
diff --git a/Test/Test_Unet.py b/Test/Test_Unet.py
new file mode 100644
index 0000000..4e63011
--- /dev/null
+++ b/Test/Test_Unet.py
@@ -0,0 +1,37 @@
+# coding:utf-8
+# Test for unet, partial.
+# Created   :   5, 25, 2018
+# Revised   :   5, 25, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.activation import *
+from dandelion.model.unet import model_Unet
+
+import dandelion
+dandelion_path = os.path.split(dandelion.__file__)[0]
+print('dandelion path = %s\n' % dandelion_path)
+
+if __name__ == '__main__':
+    im_height, im_width = 65, 63
+    model = model_Unet(im_height=im_height, im_width=im_width)
+    x = tensor.ftensor4('x')
+    y = model.forward(x)
+    print('compiling fn...')
+    fn = theano.function([x], y, no_default_updates=False)
+    print('run fn...')
+    input = np.random.rand(7, 1, im_height, im_width).astype(np.float32)
+    output = fn(input)
+    print(output)
+    print(output.shape)
+
+    print('Test passed')
+
+
+
diff --git a/Test/Test_pooling.py b/Test/Test_pooling.py
new file mode 100644
index 0000000..7ceee77
--- /dev/null
+++ b/Test/Test_pooling.py
@@ -0,0 +1,63 @@
+# coding:utf-8
+# Unit test for pooling functions
+# Created   :   2, 27, 2018
+# Revised   :   2, 27, 2018
+# All rights reserved
+#------------------------------------------------------------------------------------------------
+__author__ = 'dawei.leng'
+import os, sys, psutil
+os.environ['THEANO_FLAGS'] = "floatX=float32, mode=FAST_RUN, warn_float64='raise'"
+
+import theano
+from theano import tensor
+from dandelion.module import *
+from dandelion.functional import *
+
+def pool_1d_Lasagne(x, axis=1, mode='max'):
+    """
+    Lasagne requires x is 3D, and pooling is done on the last dimension
+    :param x:
+    :param axis:
+    :return:
+    """
+    input_4d = tensor.shape_padright(x, 1)
+    if axis == 1:
+        input_4d = input_4d.dimshuffle((0, 2, 1, 3))
+    pooled = pool_2d(input_4d,
+                     ws=(2, 1),
+                     stride=(2, 1),
+                     ignore_border=True,
+                     pad=(0, 0),
+                     mode=mode,
+                     )
+    if axis == 1:  # [DV] add support for 'axis' para
+        pooled = pooled.dimshuffle((0, 2, 1, 3))
+    return pooled[:, :, :, 0]
+
+if __name__ == '__main__':
+    import numpy as np
+
+    x_3d = tensor.ftensor3('x')
+    y_3d_D = pool_1d(x_3d, axis=1)
+
+    y_3d_L = pool_1d_Lasagne(x_3d, axis=1)
+
+    fn_D = theano.function([x_3d], y_3d_D, no_default_updates=True, on_unused_input='ignore')
+    fn_L = theano.function([x_3d], y_3d_L, no_default_updates=True, on_unused_input='ignore')
+
+
+    for i in range(20):
+        x = np.random.rand(7, 117, 27).astype(np.float32)
+        y_D = fn_D(x)
+        y_L = fn_L(x)
+        diff = np.max(np.abs(y_D - y_L))
+        print('i=%d, diff=%0.6f' % (i, diff))
+        if diff>1e-4:
+            print('y_D=\n', y_D)
+            print('y_L=\n', y_L)
+            raise ValueError('diff is too big')
+
+    print('Test passed')
+
+
+
diff --git a/Test/__init__.py b/Test/__init__.py
new file mode 100644
index 0000000..a262ecb
--- /dev/null
+++ b/Test/__init__.py
@@ -0,0 +1 @@
+from .. import dandelion
diff --git a/Test/todo.txt b/Test/todo.txt
new file mode 100644
index 0000000..d9fb193
--- /dev/null
+++ b/Test/todo.txt
@@ -0,0 +1 @@
+Build unit test interface for each Test_*.py
\ No newline at end of file
diff --git a/dandelion/__init__.py b/dandelion/__init__.py
index c39b31c..42d3718 100644
--- a/dandelion/__init__.py
+++ b/dandelion/__init__.py
@@ -7,5 +7,5 @@
 from . import functional
 from . import model
 
-__version__ = "0.15.1"
+__version__ = "0.15.2"
 __author__  = "David Leon (Dawei Leng)"
diff --git a/dandelion/module.py b/dandelion/module.py
index 07c3cc2..eb5ea17 100644
--- a/dandelion/module.py
+++ b/dandelion/module.py
@@ -978,6 +978,9 @@ def __init__(self, in_channels, out_channels, kernel_size=(3,3), stride=(1,1), p
             else:
                 self.b = self.register_param(b, shape=[out_channels], name='b_TConv2D')
 
+        self.predict = self.forward                # predict() is the same with forward() for this layer
+
+    def forward(self, input):
         if self.pad[0] == 'same':
             border_mode = 'half'
         elif self.pad[0] == 'valid':
@@ -986,20 +989,17 @@ def __init__(self, in_channels, out_channels, kernel_size=(3,3), stride=(1,1), p
             border_mode = 'full'
         else:
             border_mode = self.pad
-        self.convTOP = tensor.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=[None, self.out_channels, self.output_shape[0], self.output_shape[1]],
+        convTOP = tensor.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=[None, self.out_channels, self.output_shape[0], self.output_shape[1]],
                                                                            kshp=self.W_shape, border_mode=border_mode,
                                                                            subsample=self.stride, filter_flip=not self.flip_filters,
                                                                            filter_dilation=self.dilation,
                                                                            num_groups=self.num_groups)
-        self.predict = self.forward                # predict() is the same with forward() for this layer
-
-    def forward(self, input):
+        output_shape = self.output_shape
         if any(s is None for s in self.output_shape):
             B, C, H, W = input.shape
-            self.output_shape = tuple(conv_input_length(input, filter, stride, p)
+            output_shape = tuple(conv_input_length(input, filter, stride, p)
                                       for input, filter, stride, p in zip([H, W], self.kernel_size, self.stride, self.pad))
-
-        conved = self.convTOP(self.W, input, self.output_shape)
+        conved = convTOP(self.W, input, output_shape)
         if self.b is None:
             output = conved
         elif self.untie_bias:
diff --git a/docs/history.md b/docs/history.md
index ec01d69..d26cc88 100644
--- a/docs/history.md
+++ b/docs/history.md
@@ -1,5 +1,9 @@
 # History
 
+## version 0.15.2 [5-28-2018]
+* **FIXED**: `convTOP` should be constructed each time the `forward()` function of `ConvTransposed2D` is called.
+
+
 ## version 0.15.1 [5-25-2018]
 * **NEW**: add `model` module into master branch of Dandelion
 * **NEW**: add U-net FCN implementation into `model` module