first public release

david-leon · Apr 19, 2018 · 8bffccb · 8bffccb
commit 8bffccb
Show file tree

Hide file tree

Showing 31 changed files with 6,124 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+# Add any directories, files, or patterns you don't want to be tracked by version control
+.idea
+build
+dist
+Dandelion.egg-info
+dandelion/__pycache__
+Cython/*.whl
diff --git a/CHANGES.md b/CHANGES.md
@@ -0,0 +1,2 @@
+Changelog
+---------
diff --git a/LICENSE b/LICENSE
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include dandelion LICENSE
diff --git a/README.md b/README.md
@@ -0,0 +1,16 @@
+# Dandelion
+A quite light weight deep learning framework, on top of Theano, offering better balance between flexibility and abstraction
+
+* Aiming to offer better balance between flexibility and abstraction.
+* Easy to use and extend, support for any neural network structure.  
+* Loose coupling, each part of the framework can be modified independently.
+* More like a handy library of deep learning modules.
+Common modules such as CNN, LSTM, GRU, Dense, and common optimization methods such as SGD, Adam, Adadelta, Rmsprop are ready out-of-the-box.
+* Plug & play, operating directly on Theano tensors, no upper abstraction applied.
+Unlike previous frameworks like Keras, Lasagne, etc., Dandelion operates directly on tensors instead of layer abstractions, making it quite easy to plug in 3rd part defined deep learning modules (layer defined by Keras/Lasagne) or vice versa.
+
+## Why Another DL Framework
+* The reason is more about the lack of flexibility for existing DL frameworks, such as Keras, Lasagne, Blocks, etc.
+* By “flexibility”, we means whether it is easy to modify or extend the framework. 
+    * The famous DL framework Keras is designed to be beginner-friendly oriented, at the cost of being quite hard to modify.
+    * Compared to Keras, another less-famous framework Lasagne provides more flexibility. It’s easier to write your own layer by Lasagne for small neural network, however, for complex neural networks it still needs quite manual works because like other existing frameworks, Lasagne operates on abstracted ‘Layer’ class instead of raw tensor variables.
diff --git a/dandelion/LICENSE b/dandelion/LICENSE
diff --git a/dandelion/__init__.py b/dandelion/__init__.py
@@ -0,0 +1,10 @@
+from . import module
+from . import util
+from . import initialization
+from . import update
+from . import activation
+from . import objective
+from . import functional
+
+__version__ = "0.14.4"
+__author__  = "David Leon (Dawei Leng)"
diff --git a/dandelion/activation.py b/dandelion/activation.py
@@ -0,0 +1,309 @@
+# -*- coding: utf-8 -*-
+"""
+Non-linear activation functions for artificial neurons.
+"""
+
+import theano.tensor as tensor
+
+#--- element-wise activations ---#
+sigmoid            = tensor.nnet.sigmoid
+tanh               = tensor.tanh
+relu               = tensor.nnet.relu
+softplus           = tensor.nnet.softplus
+ultra_fast_sigmoid = tensor.nnet.ultra_fast_sigmoid
+
+#--- row-wise activation ---#
+def softmax(x):
+    """
+    Apply softmax to the last dimension of input x
+    :param x: tensor
+    :return:
+    """
+    ndim = x.ndim
+    if ndim <= 2:
+        return tensor.nnet.softmax(x)
+    else:
+        original_shape = x.shape
+        M, N = 1, original_shape[-1]
+        for i in range(x.ndim - 1):
+            M = M * original_shape[i]
+        x = tensor.reshape(x, (M, N))
+        x = tensor.nnet.softmax(x)
+        x = tensor.reshape(x, original_shape)
+        return x
+
+# scaled tanh
+class ScaledTanH(object):
+    """Scaled tanh :math:`\\varphi(x) = \\tanh(\\alpha \\cdot x) \\cdot \\beta`
+
+    This is a modified tanh function which allows to rescale both the input and
+    the output of the activation.
+
+    Scaling the input down will result in decreasing the maximum slope of the
+    tanh and as a result it will be in the linear regime in a larger interval
+    of the input space. Scaling the input up will increase the maximum slope
+    of the tanh and thus bring it closer to a step function.
+
+    Scaling the output changes the output interval to :math:`[-\\beta,\\beta]`.
+
+    Parameters
+    ----------
+    scale_in : float32
+        The scale parameter :math:`\\alpha` for the input
+
+    scale_out : float32
+        The scale parameter :math:`\\beta` for the output
+
+    Methods
+    -------
+    __call__(x)
+        Apply the scaled tanh function to the activation `x`.
+
+    Examples
+    --------
+    In contrast to other activation functions in this module, this is
+    a class that needs to be instantiated to obtain a callable:
+
+    >>> from lasagne.layers import InputLayer, DenseLayer
+    >>> l_in = InputLayer((None, 100))
+    >>> from lasagne.nonlinearities import ScaledTanH
+    >>> scaled_tanh = ScaledTanH(scale_in=0.5, scale_out=2.27)
+    >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=scaled_tanh)
+
+    Notes
+    -----
+    LeCun et al. (in [1]_, Section 4.4) suggest ``scale_in=2./3`` and
+    ``scale_out=1.7159``, which has :math:`\\varphi(\\pm 1) = \\pm 1`,
+    maximum second derivative at 1, and an effective gain close to 1.
+
+    By carefully matching :math:`\\alpha` and :math:`\\beta`, the nonlinearity
+    can also be tuned to preserve the mean and variance of its input:
+
+      * ``scale_in=0.5``, ``scale_out=2.4``: If the input is a random normal
+        variable, the output will have zero mean and unit variance.
+      * ``scale_in=1``, ``scale_out=1.6``: Same property, but with a smaller
+        linear regime in input space.
+      * ``scale_in=0.5``, ``scale_out=2.27``: If the input is a uniform normal
+        variable, the output will have zero mean and unit variance.
+      * ``scale_in=1``, ``scale_out=1.48``: Same property, but with a smaller
+        linear regime in input space.
+
+    References
+    ----------
+    .. [1] LeCun, Yann A., et al. (1998):
+       Efficient BackProp,
+       http://link.springer.com/chapter/10.1007/3-540-49430-8_2,
+       http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
+    .. [2] Masci, Jonathan, et al. (2011):
+       Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction,
+       http://link.springer.com/chapter/10.1007/978-3-642-21735-7_7,
+       http://people.idsia.ch/~ciresan/data/icann2011.pdf
+    """
+
+    def __init__(self, scale_in=1, scale_out=1):
+        self.scale_in = scale_in
+        self.scale_out = scale_out
+
+    def __call__(self, x):
+        return tensor.tanh(x * self.scale_in) * self.scale_out
+
+
+ScaledTanh = ScaledTanH  # alias with alternative capitalization
+
+# leaky rectify
+class LeakyRectify(object):
+    """Leaky rectifier :math:`\\varphi(x) = (x > 0)? x : \\alpha \\cdot x`
+
+    The leaky rectifier was introduced in [1]_. Compared to the standard
+    rectifier :func:`rectify`, it has a nonzero gradient for negative input,
+    which often helps convergence.
+
+    Parameters
+    ----------
+    leakiness : float
+        Slope for negative input, usually between 0 and 1.
+        A leakiness of 0 will lead to the standard rectifier,
+        a leakiness of 1 will lead to a linear activation function,
+        and any value in between will give a leaky rectifier.
+
+    Methods
+    -------
+    __call__(x)
+        Apply the leaky rectify function to the activation `x`.
+
+    Examples
+    --------
+    In contrast to other activation functions in this module, this is
+    a class that needs to be instantiated to obtain a callable:
+
+    >>> from lasagne.layers import InputLayer, DenseLayer
+    >>> l_in = InputLayer((None, 100))
+    >>> from lasagne.nonlinearities import LeakyRectify
+    >>> custom_rectify = LeakyRectify(0.1)
+    >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=custom_rectify)
+
+    Alternatively, you can use the provided instance for leakiness=0.01:
+
+    >>> from lasagne.nonlinearities import leaky_rectify
+    >>> l2 = DenseLayer(l_in, num_units=200, nonlinearity=leaky_rectify)
+
+    Or the one for a high leakiness of 1/3:
+
+    >>> from lasagne.nonlinearities import very_leaky_rectify
+    >>> l3 = DenseLayer(l_in, num_units=200, nonlinearity=very_leaky_rectify)
+
+    See Also
+    --------
+    leaky_rectify: Instance with default leakiness of 0.01, as in [1]_.
+    very_leaky_rectify: Instance with high leakiness of 1/3, as in [2]_.
+
+    References
+    ----------
+    .. [1] Maas et al. (2013):
+       Rectifier Nonlinearities Improve Neural Network Acoustic Models,
+       http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf
+    .. [2] Graham, Benjamin (2014):
+       Spatially-sparse convolutional neural networks,
+       http://arxiv.org/abs/1409.6070
+    """
+    def __init__(self, leakiness=0.01):
+        self.leakiness = leakiness
+
+    def __call__(self, x):
+        return tensor.nnet.relu(x, self.leakiness)
+
+
+leaky_rectify = LeakyRectify()  # shortcut with default leakiness
+leaky_rectify.__doc__ = """leaky_rectify(x)
+
+    Instance of :class:`LeakyRectify` with leakiness :math:`\\alpha=0.01`
+    """
+
+
+very_leaky_rectify = LeakyRectify(1./3)  # shortcut with high leakiness
+very_leaky_rectify.__doc__ = """very_leaky_rectify(x)
+
+     Instance of :class:`LeakyRectify` with leakiness :math:`\\alpha=1/3`
+     """
+
+
+# elu
+def elu(x):
+    """Exponential Linear Unit :math:`\\varphi(x) = (x > 0) ? x : e^x - 1`
+
+    The Exponential Linear Unit (ELU) was introduced in [1]_. Compared to the
+    linear rectifier :func:`rectify`, it has a mean activation closer to zero
+    and nonzero gradient for negative input, which can help convergence.
+    Compared to the leaky rectifier :class:`LeakyRectify`, it saturates for
+    highly negative inputs.
+
+    Parameters
+    ----------
+    x : float32
+        The activation (the summed, weighed input of a neuron).
+
+    Returns
+    -------
+    float32
+        The output of the exponential linear unit for the activation.
+
+    Notes
+    -----
+    In [1]_, an additional parameter :math:`\\alpha` controls the (negative)
+    saturation value for negative inputs, but is set to 1 for all experiments.
+    It is omitted here.
+
+    References
+    ----------
+    .. [1] Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter (2015):
+       Fast and Accurate Deep Network Learning by Exponential Linear Units
+       (ELUs), http://arxiv.org/abs/1511.07289
+    """
+    return tensor.switch(x > 0, x, tensor.expm1(x))
+
+
+# selu
+class SELU(object):
+    """
+    Scaled Exponential Linear Unit
+    :math:`\\varphi(x)=\\lambda \\left[(x>0) ? x : \\alpha(e^x-1)\\right]`
+
+    The Scaled Exponential Linear Unit (SELU) was introduced in [1]_
+    as an activation function that allows the construction of
+    self-normalizing neural networks.
+
+    Parameters
+    ----------
+    scale : float32
+        The scale parameter :math:`\\lambda` for scaling all output.
+
+    scale_neg  : float32
+        The scale parameter :math:`\\alpha`
+        for scaling output for nonpositive argument values.
+
+    Methods
+    -------
+    __call__(x)
+        Apply the SELU function to the activation `x`.
+
+    Examples
+    --------
+    In contrast to other activation functions in this module, this is
+    a class that needs to be instantiated to obtain a callable:
+
+    >>> from lasagne.layers import InputLayer, DenseLayer
+    >>> l_in = InputLayer((None, 100))
+    >>> from lasagne.nonlinearities import SELU
+    >>> selu = SELU(2, 3)
+    >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=selu)
+
+    See Also
+    --------
+    selu: Instance with :math:`\\alpha\\approx1.6733,\\lambda\\approx1.0507`
+          as used in [1]_.
+
+    References
+    ----------
+    .. [1] Günter Klambauer et al. (2017):
+       Self-Normalizing Neural Networks,
+       https://arxiv.org/abs/1706.02515
+    """
+    def __init__(self, scale=1, scale_neg=1):
+        self.scale = scale
+        self.scale_neg = scale_neg
+
+    def __call__(self, x):
+        return self.scale * tensor.switch(
+                x > 0.0,
+                x,
+                self.scale_neg * (tensor.expm1(x)))
+
+
+selu = SELU(scale=1.0507009873554804934193349852946,
+            scale_neg=1.6732632423543772848170429916717)
+selu.__doc__ = """selu(x)
+
+    Instance of :class:`SELU` with :math:`\\alpha\\approx 1.6733,
+    \\lambda\\approx 1.0507`
+
+    This has a stable and attracting fixed point of :math:`\\mu=0`,
+    :math:`\\sigma=1` under the assumptions of the
+    original paper on self-normalizing neural networks.
+    """
+
+def linear(x):
+    """Linear activation function :math:`\\varphi(x) = x`
+
+    Parameters
+    ----------
+    x : float32
+        The activation (the summed, weighted input of a neuron).
+
+    Returns
+    -------
+    float32
+        The output of the identity applied to the activation.
+    """
+    return x
+
+identity = linear