Source code for elektronn.net.convnet

# -*- coding: utf-8 -*-
# ELEKTRONN - Neural Network Toolkit
#
# Copyright (c) 2014 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Marius Killinger, Gregor Urban

print "Load ELEKTRONN Core"

import time, sys
import cPickle
import numpy as np
import theano
import theano.tensor as T

from elektronn.utils import pprinttime

import optimizer as opt
from perceptronlayer import PerceptronLayer, RecurrentLayer
from convlayer2d import ConvLayer2d
from convlayer3d import ConvLayer3d, AffinityLayer3d, MalisLayer


def _printOps(n):
    """
    Return a humanized string representation of a large number.
    """
    abbrevs = ((1000000000000, 'Tera Ops'), (1000000000, 'Giga Ops'), (1000000, 'Mega Ops'), (1000, 'kilo Ops'))
    for factor, suffix in abbrevs:
        if n >= factor:
            break
    print 'Computational Cost: %.1f %s' % (float(n) / factor, suffix)


[docs]class MixedConvNN(object):
    """
    Parameters
    ----------

    input_size: tuple
    Data shapes, excluding batch and channel (used to infer the dimensionality)
    input_depth: int/None
    Is None by default this means non-image data (no conv layers allowed). Change to 1 for b/w, 3 for RGB and\
    4 for RGB-D images etc. For RNN this is the length of the time series.
    batch_size: int/None
    None for variable batch size
    enable_dropout:  Bool
    Turn on or off dropout
    recurrent:       Bool
    Support recurrent iterations along input depth/time
    dimension_calc: dimension calculator object

    Examples
    --------

    Note that image data must have at least 1 channel, e.g. a 2d image (1,x,y). 3d requires data in the
    format (z,ch,x,y). E.g. to create an isotropic 3d CNN with 5 channels (total input shape is (1,30,5,30,30)):

    >>> MixedConvNN((30,30,30), input_depth=5, batch_size=1)

    A non-convolutional MLP can be created as:

    >>> MixedConvNN((100,), input_depth=None, batch_size=2000)

    """

    def __init__(self,
                 input_size=None,
                 input_depth=None,
                 batch_size=None,
                 enable_dropout=False,
                 recurrent=False,
                 dimension_calc=None):
        assert input_size is not None
        self.layers = []  # [0] input layer ---> [-1] output layer
        self.poolings = []
        self.params = []

        self._output_layers = []  # Empty UNLESS you use add sth explicitly
        self._autoencoder_chains = []
        self._last_grads = []
        self.debug_functions = []
        self.debug_conv_output = []
        self.debug_gradients_function = []

        self.CG_timeline = []
        self.batch_size = batch_size
        self.n_lab = None
        self.input_shape = None
        self.patch_size = np.array(input_size
                                   )  # onlt the spatial part of input shape
        self.output_strides = None
        self.output_shape = None
        self.mfp_strides = None
        self.mfp_offsets = None
        self.dimension_calc = dimension_calc
        self.TotalForwardPassCost = 0  # number of multiplications done
        self.SGD_LR = theano.shared(np.float32(0.09))  # those 3 values are to be overwritten
        self.SGD_momentum = theano.shared(np.float32(0.9))
        self.global_weightdecay = theano.shared(np.float32(0))
        self._SGD_params = {'LR': self.SGD_LR, 'momentum': self.SGD_momentum}
        self._RPROP_params = {}
        self._CG_params = {}
        self._LBFGS_params = {}
        self._use_class_weights = False
        self._enable_dropout = enable_dropout
        self._recurrent = recurrent
        self._atleast_single_mfp = False
        self._y = None
        self._y_aux = []

        self.input_noise = None
        self.t_init = time.time()

        try:
            input_size = tuple(input_size, )
        except:
            input_size = (input_size, )
        input_dim = len(input_size)
        self.n_dim = input_dim
        assert input_dim in [
            1, 2, 3
        ], "MixedConvNN: input_dimension currently not supported"

        if input_dim > 1 and input_depth is None:
            input_depth = 1
            print "For image-like data no depth was specified, using depth=1"

        if recurrent:
            assert input_dim == 1
            self._x = T.ftensor3('x_rnn_input')
            if input_depth is not None:
                self.input_shape = (batch_size, input_depth, input_size[0])  # [batch, time, feat]
            else:
                self.input_shape = (batch_size, input_size[0])  # the input is repeated (see "iterations" in rnn layer)
        else:
            x_dim = input_dim + 1  # +1 because of leading batch dimension
            if input_depth is not None:  # For images there is always an additional channel dimension (even if it is 1)
                x_dim += 1
                self.input_shape = (batch_size, input_depth) + input_size
            else:  # For non image input / prohibits ConvLayers
                self.input_shape = (batch_size, ) + input_size

            # construct tensor of matching dimensionality
            self._x = T.TensorType('float32', (False,) * x_dim, name='x_cnn_input')()
            if input_dim == 3:  # strange order for theano 3dconv
                self.input_shape = (batch_size, input_size[0], input_depth, input_size[1], input_size[2])

        print '-' * 60
        print "Input shape   = ", self.input_shape, "; This is a", input_dim, "dimensional NN"
        if batch_size is not None:
            self._layer0_input = self._x.reshape(self.input_shape)
        else:
            self._layer0_input = self._x

        print '---'

    ############################################################################################################

[docs]    def addPerceptronLayer(self,
                           n_outputs=10,
                           activation_func='tanh',
                           enable_input_noise=False,
                           add_in_output_layers=False,
                           force_no_dropout=False,
                           W=None,
                           b=None):
        """
        Adds a Perceptron layer to the CNN.

        Normally the each layer creates its own set of randomly initialised neuron weights. To reuse the weights
        of another layer (weight sharing) use the arguments ``W`` and ``b`` an pass ``T.TensorVariable``.
        If ``W`` and ``b`` are numpy arrays own weights are initialised with these values.

        Parameters
        ----------

        n_outputs: int
          The size of this layer
        activation_func: string
          {tanh, relu, sigmoid, abs, linear, maxout <i>}
          Activation function
        enable_input_noise: Bool
          If True set 20% of input to 0 randomly (similar to dropout)
        force_no_dropout:   Bool
          Set True for last/output layer
        """
        layer_input_shape = self.input_shape if (
            self.layers == []) else self.layers[-1].output_shape
        layer_input = self._layer0_input if (
            self.layers == []) else self.layers[-1].output

        if len(layer_input_shape) > 2:  # input_dimension >= 2
            layer_input = layer_input.flatten(2)
            nin = (layer_input_shape[0], np.product(layer_input_shape[1:]))
        elif len(layer_input_shape) == 2:  # input_dimension = 1
            nin = layer_input_shape
        else:
            raise ValueError('Used invalid input dimension for Perceptron layer')

        input_noise = theano.shared(np.float32(0.2)) if enable_input_noise else None
        self.input_noise = input_noise if enable_input_noise else self.input_noise
        self._y = T.wvector('y_cnn_labels')
        layer = PerceptronLayer(
            input=layer_input,
            n_in=nin[1],
            n_out=n_outputs,
            batch_size=nin[0],
            enable_dropout=(self._enable_dropout and force_no_dropout == False),
            activation_func=activation_func,
            input_noise=input_noise,
            input_layer=self.layers[-1] if len(self.layers) > 0 else None,
            W=W,
            b=b)

        if add_in_output_layers:
            self._output_layers.append(layer)
        else:
            self.layers.append(layer)

        if self.batch_size is not None:
            num_multiplications = np.product(n_outputs) * np.product(layer_input_shape)
        else:
            num_multiplications = np.product(n_outputs) * np.product(layer_input_shape[1:])
        _printOps(num_multiplications)
        print '---'
        self.TotalForwardPassCost += num_multiplications

    ############################################################################################################

[docs]    def addConvLayer(self,
                     nof_filters=None,
                     filter_size=None,
                     pool_shape=2,
                     activation_func='tanh',
                     add_in_output_layers=False,
                     force_no_dropout=False,
                     use_fragment_pooling=False,
                     reshape=False,
                     is_last_layer=False,
                     layer_input_shape=None,
                     layer_input=None,
                     W=None,
                     b=None,
                     pooling_mode='max',
                     affinity=False):
        """
        Adds a convolutional layer to the CNN. The dimensionality is *automatically* inferred.

        Normally the inputs are automatically connected the the outputs of the last added layer. To connect to a
        different layer use ``layer_input_shape`` and ``layer_input`` arguments.

        Normally the each layer creates its own set of randomly initialised neuron weights. To reuse the weights
        of another layer (weight sharing) use the arguments ``W`` and ``b`` an pass ``T.TensorVariable``.
        If ``W`` and ``b`` are numpy arrays own weights are initialised with these values.

        Parameters
        ----------
        nof_filters:          int
          Number of feature maps
        filter_size:          int/tuple
          Size/shape of convolutional filters, xy/zxy, (scalars are automatically extended to the 2d or 3d)
        pool_shape:   int/tuple
          Size/shape of pool, xy/zxy, (scalars are automatically extended to the 2d or 3d)
        activation_func:      string
          {tanh, relu, sigmoid, abs, linear, maxout <i>}
          Activation function
        force_no_dropout:     Bool
          Set True for last/output layer
        use_fragment_pooling: Bool
         Set to True for predicting dense images efficiently. Requires batch_size==1.
        reshape:              Bool
          Set to True to get 2d/3d output instead of flattened class_probabilities in the last layer
        is_last_layer:        Bool
          Shorthand for reshape=True, force_no_dropout=True and reconstruction of pooling fragments (if mfp was active)
        layer_input_shape: tuple of int
          Only needed if layer_input is not not None
        layer_input: T.TensorVariable
          Symbolic input if you do *not* want to use the previous layer of the cnn. This requires specification of
          the shape of that input with ``layer_input_shape``.
        W: np.ndarray
          weight matrix. If array, the values are used to initialise a shared variable for this layer.
                               If TensorVariable, than this variable is directly used (weight sharing with the
                               layer from which this variable comes from)

        b: np.ndarray or T.TensorVariable
          bias vector. If array, the values are used to initialise a shared variable for this layer.
                               If TensorVariable, than this variable is directly used (weight sharing with the
                               layer from which this variable comes from)
        pooling_mode: str
          'max' or 'maxabs' where the first is normal maxpooling and the second also retains sign of large negative values
        """

        n_dim = self.n_dim
        assert n_dim in [2, 3], "only 2d and 3d convolution supported!"
        if not hasattr(filter_size, '__len__'):
            filter_size = (filter_size, ) * n_dim
        elif len(filter_size) == 1:
            filter_size = filter_size * n_dim
        elif len(filter_size) != n_dim:
            raise ValueError(
                'Filter size must be either scalar or have same length as n_dim')

        if not hasattr(pool_shape, '__len__'):
            pool_shape = (pool_shape, ) * n_dim
        elif len(pool_shape) == 1:
            pool_shape = pool_shape * n_dim

        self.poolings.append(pool_shape)

        if (layer_input_shape is None) and (layer_input is None):
            layer_input_shape = self.input_shape if len(self.layers) == 0 else self.layers[-1].output_shape
            layer_input = self._layer0_input if len(self.layers) == 0 else self.layers[-1].output
        else:
            assert (layer_input_shape is not None) and (layer_input is not None),\
                "Provide either both input and shape or neither"
        assert len(layer_input_shape) in [3,4,5],\
            "Please implement the stacking of a convLayer on top of PerceptronLayer (if this is your goal)"

        if is_last_layer:
            print "Last Layer, by default: no dropout and reshaped outputs"
            force_no_dropout = True
            reshape = True
            if self._atleast_single_mfp:
                use_fragment_pooling = True

        if use_fragment_pooling:
            if self.batch_size != 1:
                print("MFP is activated and batch_size is not 1")
                #raise ValueError("MFP is activated and batch_size is not 1")
                # self.batch_size = 1 doesn't help

                # if there is mfp in at least 1 layer the output must be reshaped
            self._atleast_single_mfp = use_fragment_pooling or self._atleast_single_mfp

        dropout = (self._enable_dropout and force_no_dropout == False)

        if n_dim == 2:
            filter_shape = (nof_filters, layer_input_shape[1], filter_size[0], filter_size[1])
            CL = ConvLayer2d
            if reshape:
                self._y = T.TensorType('int16', [False, False, False], name='y_cnn_labels')()

        if n_dim == 3:
            filter_shape = (nof_filters, filter_size[0], layer_input_shape[2], filter_size[1], filter_size[2])
            CL = ConvLayer3d

            if affinity:
                print "WARNING: hack for adding affinity layer / MALIS active"
                if affinity == 'malis':
                    CL = MalisLayer
                else:
                    CL = AffinityLayer3d

            if reshape:
                self._y = T.TensorType('int16', [False, False, False, False], name='y_cnn_labels')()

        layer = CL(
            layer_input,
            layer_input_shape,
            filter_shape,
            pool_shape,
            activation_func,
            dropout,
            use_fragment_pooling,
            reshape,
            self.mfp_offsets,
            self.mfp_strides,
            input_layer=self.layers[-1] if len(self.layers) > 0 else None,
            W=W,
            b=b,
            pooling_mode=pooling_mode)

        self.mfp_offsets = layer.mfp_offsets
        self.mfp_strides = layer.mfp_strides

        if add_in_output_layers:
            self._output_layers.append(layer)
        else:
            self.layers.append(layer)

        # Calculate computational cost
        if n_dim == 2:
            n_pos = ((layer_input_shape[2]+1-filter_size[0]) *\
                     (layer_input_shape[3]+1-filter_size[1]))
        if n_dim == 3:
            n_pos = ((layer_input_shape[1]+1-filter_size[0]) *\
                     (layer_input_shape[3]+1-filter_size[1]) *\
                     (layer_input_shape[4]+1-filter_size[2]))
        if self.batch_size is not None:
            num_multiplications = np.product(filter_size) * n_pos * nof_filters *\
                                  layer_input_shape[1 if n_dim==2 else 2] * layer_input_shape[0]
        else:
            num_multiplications = np.product(filter_size) * n_pos * nof_filters *\
                                  layer_input_shape[1 if n_dim==2 else 2] # Cost for 1 patch

        _printOps(num_multiplications)
        print "Param count:", layer.params[0].get_value().size, '+', layer.params[1].get_value().size, '=',\
              layer.params[0].get_value().size + layer.params[1].get_value().size
        print '---'
        self.TotalForwardPassCost += num_multiplications

    ############################################################################################################

[docs]    def addRecurrentLayer(self,
                          n_hid=None,
                          activation_func='tanh',
                          iterations=None):
        """
        Adds a recurrent layer (only possible for non-image input of format (batch, time, features))

        Parameters
        ----------

        n_hid: int
           Number of hidden units
        activation_func: string
          {tanh, relu, sigmoid, abs, linear}
        iterations: int
          If layer input is not time-like (iterable on axis 1) it can be broadcasted and
          iterated over for a fixed number of iterations
        """
        layer_input_shape = self.input_shape if (self.layers == []) else self.layers[-1].output_shape
        layer_input = self._layer0_input if (self.layers == []) else self.layers[-1].output
        # Padding of constant input
        if len(layer_input_shape) == 2:
            print "Recurrence with broadcasted input"
            assert isinstance(iterations, int)
            bs = layer_input_shape[0] if (layer_input_shape[0] is not None) else 1
            broadcaster = (bs, iterations, layer_input_shape[1])
            layer_input = layer_input.dimshuffle(0, 'x', 1) * T.ones(broadcaster, dtype='float32')
            layer_input_shape = (layer_input_shape[0], iterations, layer_input_shape[1])
        elif len(layer_input_shape) != 3:
            raise ValueError('Used invalid input dimension for Recurrent layer')

        nin = layer_input_shape  # [batch, time, features]

        self._y = T.wvector('y_cnn_labels')
        layer = RecurrentLayer(input=layer_input,
                               n_in=layer_input_shape[2],
                               n_hid=n_hid,
                               batch_size=layer_input_shape[0],
                               activation_func=activation_func)

        self.layers.append(layer)

        if self.batch_size is not None:
            num_multiplications = np.product(nin) * n_hid + nin[0] * nin[1] * n_hid**2
        else:
            num_multiplications = np.product(nin[1:]) * n_hid + nin[1] * n_hid**2
        _printOps(num_multiplications)
        print '---'
        self.TotalForwardPassCost += num_multiplications

    ############################################################################################################

[docs]    def addTiedAutoencoderChain(self,
                                n_layers=None,
                                force_no_dropout=False,
                                activation_func='tanh',
                                input_noise=0.3,
                                tie_W=True):
        """
        Creates connected layers to invert Perceptron layers. Input is assumed to come from the first layer.

        Parameters
        ----------

        n_layers: int
           Number of layers that will be added/inverted, (input < 0 means all)
        activation_func:      string
           {tanh, relu, sigmoid, abs, linear}
           Activation function
        force_no_dropout:     Bool
          set True for last/output layer
        input_noise:         Bool
          Noise rate that will be applied to the input of the first reconstructor
        tie_W:                Bool
          Whether to share weight of dual layer pairs
        """
        if not n_layers:  # Automatically find number of Layers if not specified
            n_layers = len(self.layers)
        assert 0 < n_layers <= len(self.layers), "Number of Autoencoder layers not possible"

        chain = [self.layers[n_layers - 1]]  # if n_layers = depth(NN), add last layer, if n_layers is smaller add
                                             # <n_layers>th layer (s.t. a MLP remains after the AE bzw. next to it))

        first = True
        for i in xrange(n_layers - 1, -1, - 1):  # Invert layers starting from the deepest layer
            n_outputs = self.layers[i].n_in  # Get n_out and Weights from mirror layer
            W = self.layers[i].W.T if tie_W else None
            n_inputs = chain[-1].output_shape[1]  # Get input from previous layer in chain
            # (the first in chain is the deepest layer in the normal Net)
            batch_size = chain[-1].output_shape[0]
            dropout = self._enable_dropout and not force_no_dropout
            noise = input_noise if first else None
            PLayer = PerceptronLayer(input=chain[-1].output,
                                     n_in=n_inputs,
                                     n_out=n_outputs,
                                     batch_size=batch_size,
                                     enable_dropout=dropout,
                                     activation_func=activation_func,
                                     W=W,
                                     input_noise=noise,
                                     input_layer=chain[-1])
            chain.append(PLayer)
            first = False

        self._autoencoder_chains.extend(chain[1:])  # only keep the newly added Layers

        if not tie_W:
            self.layers += chain[1:]

    ############################################################################################################

[docs]    def compileDebugFunctions(self, gradients=True):
        """
        Compiles the debug_functions which return the network activations / output. To use them compile them with
        this function. They by accessible as cnn.debug_functions (normal output), cnn.debug_conv_output,
        cnn.debug_gradients_function (if True).
        """
        if len(self.debug_functions) != 0:
            print "debug functions are not empty"
            return

        for lay in self.layers:
            self.debug_functions.append(theano.function([self._x], lay.output))
            try:  # This is the output before pooling etc.
                self.debug_conv_output.append(theano.function(
                    [self._x],
                    lay.conv_output,
                    on_unused_input='ignore'))
            except:
                pass
        if gradients:
            self.debug_gradients_function = opt.Optimizer(self).compileGradients()

    ############################################################################################################

[docs]    def compileOutputFunctions(self,
                               target='nll',
                               use_class_weights=False,
                               use_example_weights=False,
                               use_lazy_labels=False,
                               use_label_prop=False,
                               only_forward=False):
        """
        Compiles the output functions ``get_loss``, ``get_error``, ``class_probabilities`` and defines the
        gradient (which is not compiled)

        Parameters
        ----------

        target: string
         'nll'/'regression', regression has squared error and nll_masked allows training with
          lazy labels; this requires the auxiliary (*aux) masks.
        use_class_weights: Bool
          whether to use class weights for the error
        use_example_weights: Bool
          whether to use example weights for the error
        use_lazy_labels: Bool
          whether to use lazy labels; this requires the auxiliary (*aux) masks
        use_label_prop: Bool
          whether to activate label propagation on unlabelled (-1) examples
        only_forward: Bool
          This exlcudes the building of the gradient (faster)

        Defined functions:

        (They are accessible as methods of ``MixedConvNN``)

        get_loss: theano-function
          [data, labels(, *aux)] --> [loss, loss_instance]
        get_error: theano-function
          [data, labels(, *aux)] --> [loss, (error,) prediction] no error for regression
        class_probabilities: theano-function
          [data] --> [prediction]
        """
        print "GLOBAL"
        _printOps(self.TotalForwardPassCost)
        self.t_graph = time.time()

        for lay in self.layers:
            if lay.params != []:
                self.params.extend(lay.params[::-1])  # (b, W)

        if self._autoencoder_chains is not []:  # add thos layers, but not to the params
            self.layers.extend(self._autoencoder_chains)

        self.param_count = np.sum([np.prod(p.get_value().shape) for p in self.params])

        print "Total Count of trainable Parameters:", self.param_count
        print "Building Computational Graph took %.3f s" % (self.t_graph - self.t_init)
        pp_cw = "using class_weights" if use_class_weights else "using no class_weights"
        pp_ew = "using example_weights" if use_example_weights else "using no example_weights"
        pp_ll = "using lazy_labels" if use_lazy_labels else "using no lazy_labels"
        pp_lp = "label propagation active" if use_label_prop else "label propagation inactive"
        print "Compiling output functions for %s target:\n\t%s\n \t%s\n \t%s\n \t%s\n" % (
            target, pp_cw, pp_ew, pp_ll, pp_lp)

        if len(self._output_layers) != 0:
            print "Warning: <compileOutputFunctions> only applies to the LAST layer in self.layers \
        (and ignores elements of self._output_layers)"

        layer_last = self.layers[-1]
        self.output_shape = layer_last.output_shape
        if (len(layer_last.output_shape) == 2) or self.n_dim != 3:  #Perceptron layer or any other
            self.n_lab = layer_last.output_shape[1]
        else:
            self.n_lab = layer_last.output_shape[2]
        # Define Target functions
        if target == 'regression':
            n_dim_regression = len(layer_last.output_shape)
            if isinstance(layer_last, (ConvLayer2d, ConvLayer3d)):
                n_dim_regression -= 1 # spatial input has no channel...
                
            self._y = T.TensorType('float32', (False,) * n_dim_regression, name='y_cnn_regression_targets')()
            self._loss, self._loss_instance = layer_last.squared_distance(self._y)
            ret = [self._loss, T.sqrt(self._loss), layer_last.output]
            self.get_error = theano.function([self._x, self._y], ret)
            self.prediction = theano.function([self._x], layer_last.output)

        elif target == 'nll_mutiple_binary':
            self._y = T.wmatrix('y_nll_mutiple_binary_targets')
            if use_class_weights:
                class_weights = T.TensorType('float32', [False], name='class_weights')()
                self._y_aux.append(class_weights)
            else:
                class_weights = None

            self._loss, self._loss_instance = layer_last.nll_mutiple_binary(self._y, class_weights)

        elif target == 'nll_weak':
            if use_class_weights:
                class_weights = T.TensorType('float32', [False], name='class_weights')()
                self._y_aux.append(class_weights)
            else:
                class_weights = None

            self._loss, self._loss_instance = layer_last.NLL_weak(self._y, class_weights)

        elif target == 'affinity':
            self._y = T.TensorType('int16', (False,) * 5, name='y_cnn_affinity_targets')()
            if use_class_weights:
                class_weights = T.TensorType('float32', [False], name='class_weights')()
                self._y_aux.append(class_weights)
            else:
                class_weights = None

            self._loss, self._loss_instance = layer_last.NLL_affinity(self._y, class_weights)

        elif target == 'malis':
            self._y = T.TensorType('int16', (False,) * 5, name='y_cnn_affinity_targets')()
            self._y_aux.append(T.TensorType('int16', (False,) * 4, name='y_cnn_seg_gt')())
            if use_class_weights:
                class_weights = T.TensorType('float32', [False], name='class_weights')()
                self._y_aux.append(class_weights)
            else:
                class_weights = None

            ret = layer_last.NLL_Malis(self._y, self._y_aux[0])
            self._loss = ret[0]
            self._loss_instance = ret[0]
            self.malis_stats = theano.function([self._x, self._y, self._y_aux[0]], ret)

        elif target == 'nll':
            if use_lazy_labels:
                if not (isinstance(layer_last, ConvLayer2d) or isinstance(layer_last, ConvLayer3d)):
                    raise ValueError("Cannot use lazy labels for Percptron layer")

                mask1 = T.TensorType('int16', [False, False], name='mask_class_labeled')()
                self._y_aux.append(mask1)
                mask2 = T.TensorType('int16', [False, False], name='mask_class_not_present')()
                self._y_aux.append(mask2)
            else:
                mask1, mask2 = None, None

            if use_class_weights:
                class_weights = T.TensorType('float32', [False], name='class_weights')()
                self._y_aux.append(class_weights)
            else:
                class_weights = None

            if use_example_weights:
                example_weights = T.TensorType('float32', (False,) * (self._x.ndim - 1), name='example_weights')()
                self._y_aux.append(example_weights)
            else:
                example_weights = None

            if use_label_prop:
                label_prop_thresh = T.fscalar('label_prop_thresh')
                self._y_aux.append(label_prop_thresh)
            else:
                label_prop_thresh = None

            if use_lazy_labels:
                self._loss, self._loss_instance = layer_last.NLL(
                    self._y,
                    class_weights,
                    example_weights,
                    mask_class_labeled=mask1,
                    mask_class_not_present=mask2,
                    label_prop_thresh=label_prop_thresh)
            else:
                self._loss, self._loss_instance = layer_last.NLL(
                    self._y,
                    class_weights,
                    example_weights,
                    label_prop_thresh=label_prop_thresh)

                # aux is possibly [mask_class_labeled, mask_class_not_present, class_weights, label_prop_thresh]

                # For all targets except regression the predictions / accuracy
        if target != 'regression':
            ret = [self._loss, layer_last.errors(self._y), layer_last.class_prediction]
            if target == 'nll_mutiple_binary':
                ret = [self._loss, layer_last.errors_no_tn(self._y), layer_last.class_prediction]

            self.get_error = theano.function([self._x, self._y] + self._y_aux, ret)
            self.class_probabilities = theano.function([self._x], layer_last.class_probabilities)

        # create a list of symbolic gradients for all model parameters
        if not only_forward:
            self._gradients = T.grad(self._loss, self.params, disconnected_inputs="warn")
            self.get_loss = opt.Optimizer(self).get_loss

        if isinstance(layer_last, (ConvLayer2d, ConvLayer3d, AffinityLayer3d)):
            try:
                self.output_shape = layer_last.prob_shape
            except:
                pass

            self.output_strides = map(np.prod, zip(*self.poolings))
            if self.mfp_strides is not None:
                self.output_strides = np.divide(self.output_strides, self.mfp_strides)

        self.t_out = time.time()
        print " Compiling done  - in %.3f s!" % (self.t_out - self.t_graph)
        print '-' * 60
        print '-' * 60

    ############################################################################################################

[docs]    def resetMomenta(self):
        """Resets the trailing average of the gradient to sole current gradient"""
        print "CNN: resetting momenta"
        print '\t'.join([str(len(x)) for x in (self.params, self._last_grads)])
        for para, lg in zip(self.params, self._last_grads):
            sp = para.get_value().shape
            lg.set_value(np.zeros(sp, dtype='float32'), borrow=0)

        try:
            for para, rp in zip(self.params, self._RPROP_LRs, ):
                sp = para.get_value().shape
                rp.set_value(1e-3 * np.ones(sp, dtype='float32'), borrow=0)
        except:
            pass

[docs]    def randomizeWeights(self, reset_momenta=True):
        """Resets weights to random values (calls randomize_weights() on each layer)"""
        print "CNN: Randomizing weights"
        for lay in self.layers + self._output_layers:
            lay.randomizeWeights()
        if reset_momenta:
            self.resetMomenta()

    ############################################################################################################
    ### Controlling Training ###################################################################################
    ############################################################################################################

[docs]    def trainingStep(self, *args, **kwargs):
        """
        Perform one optimiser iteration.
        Optimizers can be chosen by the kwarg ``mode``. They are complied on demand (which may take a while) and cached

        **Signature**: cnn.trainingStep(data, label(, *aux)(,**kwargs))

        Parameters
        ----------

        data: float32 array
          input [bs, ch (, x, y)] or [bs, z, ch, x, y]
        labels: int16 array
          [bs,((z,)y,x)] if output is not flattened
        aux: int16 arrays
          (optional) auxiliary weights/masks/etc. Should be unpacked list
        kwargs:
          * mode: string
              ['SGD']: (default) Good if data set is big and redundant

              'RPROP': which does neither uses a fix learning rate nor the momentum-value.
              It is faster than SGD if you do full-batch Training and use NO dropout.
              Any source of noise leads to failure of convergence (at all).

              'CG': Good generalisation but requires large batches. Returns current loss always

              'LBFGS': http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html


           * update_loss: Bool
               determine current loss *after* update step (e.g. needed for queue, but ``get_loss`` can also be\
               called explicitly)

        Returns
        -------
        loss: float32
          loss (nll or squared error)
        loss_instance: float32 array
          loss for individual batch examples/pixels
        time_per_step: float
          Time spent on the GPU per step
        """
        mode = kwargs.get('mode', 'SGD')
        param_var = None
        t0 = time.time()
        # Check if auxiliary arguments are ok
        if len(args) != (len(self._y_aux) + 2):
            raise ValueError("The number of auxiliary arguments for the NLL is not matching the compiled signature: "
                             "%s. Got %i auxiliary args." % (self._y_aux, len(args) - 2))

        if mode == 'SGD':
            if not hasattr(self, 'SGD'):
                self.SGD = opt.compileSGD(self._SGD_params, self)

            loss, loss_instance = self.SGD(*args)
            if kwargs.get('update_loss', False):
                loss, loss_instance = self.get_loss(*args)

        elif mode == 'RPROP':
            if not hasattr(self, 'RPROP'):
                self.RPROP = opt.compileRPROP(self._RPROP_params, self)

            loss, loss_instance = self.RPROP(*args)
            if kwargs.get('update_loss', False):
                loss, loss_instance = self.get_loss(*args)

        elif mode == 'CG':
            if not hasattr(self, 'CG'):
                self.CG = opt.compileCG(self._CG_params, self)

            loss, loss_instance = self.CG(*args)  # this already is updated loss

        elif mode == 'LBFGS':
            if not hasattr(self, 'LBFGS'):
                self.LBFGS = opt.compileLBFGS(self._LBFGS_params, self)

            loss = self.LBFGS(*args)  # this already is updated loss
            loss_instance = loss

        elif mode == 'Adam':
            if not hasattr(self, 'Adam'):
                self.Adam = opt.compileAdam(self._Adam_params, self)

            loss, loss_instance = self.Adam(*args)  # this already is updated loss

        else:
            print "No mode %s" % mode
            return 0, 0, 0

        t = (time.time() - t0) + 1e-10  # add some epsilon to ensure > 0
        return np.float32(loss), loss_instance, t  ### TODO remove again

[docs]    def setOptimizerParams(self,
                           SGD={},
                           CG={},
                           RPROP={},
                           LBFGS={},
                           Adam={},
                           weight_decay=0.0):
        """
        Initialise optimiser hyper-parameters prior to compilation. SGD, CG and LBFGS this can also be done during
        Training.

        ``weight_decay`` is global to all optimisers and
        is identical to a L2-penalty on the weights with the coefficient given by ``weight_decay``
        """
        if weight_decay == False:
            self.global_weightdecay.set_value(np.float32(0), borrow=False)
        else:
            self.global_weightdecay.set_value(np.float32(weight_decay), borrow=False)

        self.setSGDLR(SGD.get("LR", 0.001))
        self.setSGDMomentum(SGD.get("momentum", 0.9))

        self._RPROP_params = dict(penalty=0.35,
                                  gain=0.2,
                                  beta=0.7,
                                  initial_update_size=1e-4)
        self._RPROP_params.update(RPROP)

        self._CG_params = dict(n_steps=3,
                               alpha=0.35,
                               beta=0.7,
                               max_step=0.02,
                               min_step=8e-5,
                               only_descent=False,
                               show=False)
        self._CG_params.update(CG)

        self._LBFGS_params = dict(maxfun= 40,  # function evaluations
                                  maxiter= 4,  # iterations
                                  m= 10,       # maximum number of variable metric corrections
                                  factr= 1e2,  # factor of machine precision as termination criterion (haha!)
                                  pgtol= 1e-9, # projected gradient tolerance
                                  iprint= -1)  # set to 0 for direct printing of steps
        self._LBFGS_params.update(LBFGS)

        self._Adam_params = {}
        self._Adam_params.update(Adam)

        if hasattr(self, 'SSGD'):
            self.SSGD.updateOptimizerParams(SSGD)

        if hasattr(self, 'CG'):
            self.CG.updateOptimizerParams(CG)

        if hasattr(self, 'LBFGS'):
            self.LBFGS.updateOptimizerParams(LBFGS)

[docs]    def setSGDLR(self, value=0.09):
        self.SGD_LR.set_value(np.float32(value), borrow=False)

[docs]    def setSGDMomentum(self, value=0.9):
        self.SGD_momentum.set_value(np.float32(value), borrow=False)

[docs]    def setWeightDecay(self, value=0.0005):
        self.global_weightdecay.set_value(np.float32(value), borrow=False)

[docs]    def setDropoutRates(self, rates):
        """Assumes a vector/list/array as input, first entry <--> first layer (etc.)"""
        for lay, ra, i in zip(self.layers, rates, range(len(rates))):
            try:
                assert 1.0 >= np.float32(ra) >= 0, "Dropout rates must be [0,1]"
                lay.activation_noise.set_value(np.float32(ra))
                #print 'layer',i,'new noise rate:',np.float32(ra*100.0),'%'
            except:
                #print 'set_dropout_rates: Warning: Dropout not enabled in this layer'
                pass

[docs]    def getDropoutRates(self):
        """Returns list of dropout rates"""
        rates = []
        for lay in self.layers:
            try:
                rates.append(np.float32(lay.activation_noise.get_value()))
            except:
                pass
                #sys.excepthook(*sys.exc_info())
        return rates

    ############################################################################################################
    ### Utilities ##############################################################################################
    ############################################################################################################

    def _predictDenseTile(self, raw_img, out_arr, offset):
        """
        Parameters
        ----------
        raw_img: np.ndarray
          raw image (ch, x, y) or (z, ch, x, y)to be predicted
          The shape must be cnn.patch_size + cnn.output_strides - 1 (elwise)
        out_arr: np.ndarray
          The shape is cnn.patch_size + cnn.mfp_strides - floor(cnn.offset) - 1 (elwise)
        offsets: array / list
          The cnn offsets (only needed if cnn was initialised without a dimension calculator)

        Returns
        -------
        class_probabilities: np.ndarray
          prediction (n_lab, z, x, y)
          The shape is cnn.patch_size + cnn.mfp_strides - floor(cnn.offset) - 1 (elwise)

        """

        if np.all(np.equal(self.output_strides, 1)):
            if self.n_dim == 2:
                out_arr[:, 0] = self.class_probabilities(raw_img[None])[0]  # (ch,x,y)
            else:
                out_arr[:] = self.class_probabilities(raw_img[None])[0]  # (z,ch,x,y)

        else:
            for x_off in range(self.output_strides[-2]):
                for y_off in range(self.output_strides[-1]):
                    if self.n_dim == 2:
                        cut_img = raw_img[None, :, x_off:x_off + self.patch_size[0], y_off:y_off + self.patch_size[1]]

                        #prob = self.class_probabilities(cut_img)[0]
                        # insert prob(ch, x, y) into out_arr(ch,z,x,y)
                        out_arr[:, 0, x_off::self.output_strides[0], y_off::
                                self.output_strides[1]] = self.class_probabilities(cut_img)[0]

                    elif self.n_dim == 3:
                        for z_off in range(self.output_strides[0]):
                            cut_img = raw_img[None,z_off:z_off + self.patch_size[0], :,
                                              x_off:x_off + self.patch_size[1], y_off:y_off + self.patch_size[2]]

                            #prob = self.class_probabilities(cut_img)[0]
                            out_arr[:, z_off::self.output_strides[0], x_off::self.output_strides[1], y_off::
                                    self.output_strides[2]
                                   ] = self.class_probabilities(cut_img)[0]

        return out_arr

[docs]    def predictDense(self,
                     raw_img,
                     show_progress=True,
                     offset=None,
                     as_uint8=False,
                     pad_raw=False):
        """
        Core function that performs the inference

        raw_img : np.ndarray
          raw data in the format (ch, x, y(, z))
        show_progress: Bool
          Whether to print progress state
        offset: 2/3-tuple
          If the cnn has no dimension calculator object, this specifies the cnn offset.
        as_uint8: Bool
          Return class proabilites as uint8 image (scaled between 0 and 255!)
        pad_raw: Bool
          Whether to apply padding (by mirroring) to the raw input image
          in order to get predictions on the full imgae domain.
        """
        # WARNING: this code contains mixed orders of xyz and zxy! The raw_img is swapped later!

        # determine normalisation depending on int or float type
        if raw_img.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32,
                             np.uint, np.uint8, np.uint16, np.uint32, np.uint32]:
            m = 255
        else:
            m = 1

        raw_img = np.ascontiguousarray(raw_img, dtype=np.float32) / m

        time_start = time.time()
        strip_z = False
        if len(raw_img.shape) == 3:
            strip_z = True
            raw_img = raw_img[..., None]  # add singleton z-channel
        if self.dimension_calc is not None:
            offset = np.floor(self.dimension_calc.offset).astype(np.int)
        else:
            assert offset is not None,"If the cnn has not been intialised with a dimension calculator object, you must pass the offset to this function explicitly"

            offset = np.floor(offset).astype(np.int)

        n_lab = self.n_lab
        cnn_out_sh = self.output_shape[2:]  # without batch size and channel/n_lab
        ps = self.patch_size
        strides = self.output_strides

        if self.n_dim == 2:
            cnn_out_sh = np.concatenate([[1, ], cnn_out_sh])
            ps = np.concatenate([[1, ], ps])
            strides = np.concatenate([[1, ], strides])
            offset = np.concatenate([[0, ], offset])

        if pad_raw:
            raw_img = np.pad(raw_img, [(0, 0), (offset[1], offset[1]), (offset[2], offset[2]), (offset[0], offset[0])],
                             mode='symmetric')

        raw_sh = raw_img.shape[1:]  # only spatial, not channels

        tile_sh = np.add(ps, strides) - 1  # zxy
        #prob_sh = np.array([ps[i]+strides[i]-1-2*offset[i] for i in xrange(3)]) # zxy
        prob_sh = np.multiply(cnn_out_sh, strides)
        prob_arr = np.zeros(np.concatenate([[self.n_lab, ], prob_sh]), dtype=np.float32)  # zxy

        pred_sh = np.array([raw_sh[0] - 2 * offset[1], raw_sh[1] - 2 * offset[2], raw_sh[2] - 2 * offset[0]])  # xyz
        if as_uint8:
            predictions = np.zeros(np.concatenate(([n_lab, ], pred_sh)), dtype=np.uint8)  # xyz
        else:
            predictions = np.zeros(np.concatenate(([n_lab, ], pred_sh)), dtype=np.float32)  # xyz

        if self._atleast_single_mfp and not np.all(np.equal(self.output_strides, 1)):
            raise NotImplementedError("If MFP is partially enabled, the dense prediction does not work atm")

        # Calculate number of tiles (in 3d: blocks) that need to be performed
        x_tiles = int(np.ceil(float(pred_sh[0]) / prob_sh[1]))
        y_tiles = int(np.ceil(float(pred_sh[1]) / prob_sh[2]))
        z_tiles = int(np.ceil(float(pred_sh[2]) / prob_sh[0]))
        total_nb_tiles = np.product([x_tiles, y_tiles, z_tiles])
        print "Predicting img", raw_img.shape, "in", total_nb_tiles, "Blocks:", (x_tiles, y_tiles, z_tiles)
        count = 0
        for x_t in range(x_tiles):
            for y_t in range(y_tiles):
                for z_t in range(z_tiles):
                    # For every z_tile a slice of thickness cnn_out_sh[2] is
                    # collected and then collectively written to the output_data
                    raw_tile = raw_img[:, x_t * prob_sh[1]:x_t * prob_sh[1] + tile_sh[1],
                                       y_t * prob_sh[2]:y_t * prob_sh[2] + tile_sh[2],
                                       z_t * prob_sh[0]:z_t * prob_sh[0] + tile_sh[0]]

                    this_is_end_tile = False if np.all(np.equal(raw_tile.shape[1:], np.roll(tile_sh, 2))) else True

                    if this_is_end_tile:  # requires 0-padding
                        right_pad = np.subtract(np.roll(tile_sh, 2), raw_tile.shape[1:])  # (ch,x,y,z)
                        right_pad = np.concatenate(([0, ], right_pad))  # for channel dimension
                        left_pad = np.zeros(raw_tile.ndim, dtype=np.int)
                        pad_with = list(zip(left_pad, right_pad))
                        raw_tile = np.pad(raw_tile, pad_with, mode='constant')

                    if self.n_dim == 2:
                        # slice from raw_tile(ch,x,y,z) --> (ch,x,y)
                        prob_arr = self._predictDenseTile(raw_tile[..., 0], prob_arr, offset)  # returns (ch,z=1,x,y)
                        prob = prob_arr[:, 0, :, :, None]  # (ch,z=1,x,y) -> (ch,x,y,z=1)
                    else:
                        raw_tile = np.transpose(raw_tile, (3, 0, 1, 2))  # (ch,x,y,z) -> (z,ch,x,y)
                        prob_arr = self._predictDenseTile(raw_tile, prob_arr, offset)
                        prob = np.transpose(prob_arr, (0, 2, 3, 1))  # (ch,z,x,y) -> (ch,x,y,z)

                    if this_is_end_tile:  # cut away padded range
                        prob = prob[:, :prob_sh[1] - right_pad[1],
                                    :prob_sh[2] - right_pad[2], :prob_sh[0] - right_pad[3]]

                    if as_uint8:
                        prob *= 255
                        prob = prob.astype(np.uint8)  # maybe not needed...

                    predictions[:, x_t * prob_sh[1]:(x_t + 1) * prob_sh[1], y_t * prob_sh[2]:(y_t + 1) * prob_sh[2],
                                z_t * prob_sh[0]:(z_t + 1) * prob_sh[0]] = prob

                    count += 1
                    if show_progress:
                        dtime = time.time() - time_start
                        progress = count * 100.0 / total_nb_tiles
                        estimate = dtime / progress * 100.
                        if progress <= 100:
                            dtime = pprinttime(dtime)
                            estimate = pprinttime(estimate)
                            sys.stdout.write('\rProgress: %.2f%% in %s; estimate: %s' % (progress, dtime, estimate))
                            sys.stdout.flush()

        sys.stdout.write(' - done\n'.decode("string_escape"))
        sys.stdout.flush()
        print "Inference speed: %.3f MB or MPix /s\n" %\
              (np.product(predictions.shape[1:]) * 1.0 / 1000000 / (time.time() - time_start))

        if strip_z: predictions = predictions[:, :, :, 0]

        return predictions

[docs]    def get_activities(self, data):
        n = len(self.layers)
        activities = []
        for layer, dbgf, i in zip(self.layers, self.debug_functions, range(n)):
            activities.append(dbgf(data))
        return activities

[docs]    def get_nonpooled_activities(self, data):
        n = len(self.layers)
        activities = []
        for layer, dbgf, i in zip(self.layers, self.debug_conv_output, range(n)):
            activities.append(dbgf(data))
        return activities

[docs]    def saveParameters(self, path='CNN.save', layers=None, show=True):
        """Saves parameters to file, that can be loaded by ``loadParameters``"""
        if show:
            print 'Saving params to file'
        f = open(path, 'w')
        if layers is None:
            n_lay = len(self.layers) - len(self._autoencoder_chains)  # exclude the AE chains (they have only shared W)
            layers = self.layers[:n_lay]

        shape_info = []
        for lay in layers:
            shape_info.append(lay.params[0].get_value(borrow=True).shape)

        if show:
            print ' shapes are: ' + str(shape_info)
        cPickle.dump(shape_info, f, protocol=2)

        for lay in layers:
            cPickle.dump(lay.params[0].get_value(borrow=True), f, protocol=2)
            cPickle.dump(lay.params[1].get_value(borrow=True), f, protocol=2)
            if len(lay.params) > 2:  # Recurrent Params
                cPickle.dump(lay.params[2].get_value(borrow=True), f, protocol=2)
                cPickle.dump(lay.params[3].get_value(borrow=True), f, protocol=2)

        cPickle.dump(self.poolings, f, protocol=2)  # list of all pooling factors
        f.close()

[docs]    def loadParameters(self, myfile="CNN.save", strict=False, n_layers_to_load=-1):
        """
        Loads parameters from file created by ``saveParameters``. The parameter shapes do not need to fit the CNN
        architecture, they "squeezed" or "padded" to fit.

        Additionally the momenta of the gradients are reset

        Parameters
        ----------

        myfile: string
          Path to file
        strict: bool
          If true, parameter shapes must fit exactly, this the only way to load RNN parameters
        n_layers_to_load: int
          Only the first x layers are initialised if this is not at its default value (-1)
        """
        self.resetMomenta()
        if strict:
            self._loadParametersStrict(myfile)
        else:
            self._loadParametersAdaptive(myfile, n_layers_to_load=n_layers_to_load)

    def _loadParametersAdaptive(self, myfile="CNN.save", n_layers_to_load=-1):
        """
        Load a parameter set which is NOT fully compatible to the current network configuration
        (e.g. different filter sizes, number of filters etc).

        detects if layers already are in correct shape
        """
        print "loading(adaptive) from", myfile
        try:
            f = open(myfile, 'r')
        except:
            print "CNN: ERROR: Cannot load file '", myfile, "'"

        shp = cPickle.load(f)
        print "Shapes of loaded file are:", shp
        print "Shapes of current Net are:", [lay.params[0].get_value(borrow=True).shape for lay in self.layers]
        if n_layers_to_load < 0:
            n_layers_to_load = len(shp)
        print "#Layers(sav) =", len(shp), "loading", n_layers_to_load

        print "#Layers(CNN) =", len(self.layers)

        for it, layer in enumerate(self.layers):
            if it == n_layers_to_load:
                break
            if it < len(shp):
                try:
                    p = cPickle.load(f)
                except:
                    print "Error! " * 7
                    print "LoadParametersAdaptive::ERROR: invalid file, cancelled after", it, "layers were loaded!"
                    e = sys.exc_info()[0]
                    print "<p>Error: %s</p>" % e
                    print "Error! " * 7
            else:
                p = [[[[]]]]
                print "debug missing, might crash now!"
            save_shape = np.shape(p)
            target_shape = layer.params[0].get_value(borrow=True).shape

            #load W
            if save_shape == target_shape:
                layer.params[0].set_value(p, borrow=False)
            elif len(target_shape)==len(save_shape) and len(save_shape)==4:
                #temp param of correct shape, weights with same variance as loaded parameters (mean=0)
                temp = np.float32(np.random.normal(0,0.02,target_shape))
                if (target_shape[0]>save_shape[0]):#need more filters than in save
                    for i in range(0,target_shape[0],save_shape[0]):
                        if target_shape[1]>save_shape[1]:
                            for j in range(0, target_shape[1], save_shape[1]):
                                temp[i:min(target_shape[0], save_shape[0] + i),
                                     j:min(target_shape[1], save_shape[1] + j), :min(target_shape[2], save_shape[2]),
                                     :min(target_shape[3], save_shape[3])
                                    ] = p[:(min(target_shape[0], save_shape[0] + i) - i),
                                          :min(target_shape[1] - j, save_shape[1]),
                                          :min(target_shape[2], save_shape[2]),
                                          :min(target_shape[3], save_shape[3])]
                        else:
                            temp[i:min(target_shape[0], save_shape[0] + i), :target_shape[1],
                                 :min(target_shape[2], save_shape[2]),
                                 :min(target_shape[3], save_shape[3])
                                 ] = p[:(min(target_shape[0], save_shape[0] + i) - i),
                                       :target_shape[1],
                                       :min(target_shape[2], save_shape[2]),
                                       :min(target_shape[3], save_shape[3])]

                else:
                    if target_shape[1] > save_shape[1]:
                        for j in range(0, target_shape[1], save_shape[1]):
                            temp[:min(target_shape[0], save_shape[0]), j:min(target_shape[1], save_shape[1] + j),
                                 :min(target_shape[2], save_shape[2]),
                                 :min(target_shape[3], save_shape[3])
                                ] = p[:min(target_shape[0], save_shape[0]),
                                      :min(target_shape[1] - j, save_shape[1]),
                                      :min(target_shape[2], save_shape[2]),
                                      :min(target_shape[3], save_shape[3])
                                     ] + np.random.rand(min(target_shape[0], save_shape[0]),
                                                        min(target_shape[1], save_shape[1]+j)-j,
                                                        min(target_shape[2], save_shape[2]),
                                                        min(target_shape[3], save_shape[3])
                                                        ) * 1e-4
                    else:
                        mid_offset= 0
                        if target_shape[1] < save_shape[1]:
                            mid_offset = int((save_shape[1] - target_shape[1]) / 2.)

                        temp[:min(target_shape[0],save_shape[0]),
                             :target_shape[1],
                             :min(target_shape[2],save_shape[2]),
                             :min(target_shape[3],save_shape[3])
                             ] = p[:min(target_shape[0], save_shape[0]),
                                   mid_offset:mid_offset + target_shape[1],
                                   :min(target_shape[2], save_shape[2]),
                                   :min(target_shape[3], save_shape[3])]

                layer.params[0].set_value(temp, borrow=False)

            elif len(target_shape) == len(save_shape) and len(save_shape) == 5:
                print "adapting 3D_net_filter..."
                #(64, 3, 32, 3, 3) #n. = 64, depth=32
                #print "fan-in correction factor =",n_params_ratio

                temp = np.float32(np.random.normal(0, np.std(p) + 1e-9, target_shape))  #/6.*n_params_ratio

                nf_start = 0
                nf_end = min(target_shape[0], save_shape[0])

                f_st = max(int((target_shape[1] - save_shape[1]) / 2.), 0)
                f_end = f_st + min(target_shape[1], save_shape[1])

                f_st_ = max(int((save_shape[1] - target_shape[1]) / 2.), 0)
                f_end_ = f_st_ + min(target_shape[1], save_shape[1])

                c_st = 0
                c_end = c_st + min(target_shape[2], save_shape[2])  #

                c_st_ = 0
                c_end_ = c_st_ + min(target_shape[2], save_shape[2])  #

                temp[nf_start:nf_end, f_st:f_end, c_st:c_end, f_st:f_end, f_st:f_end
                    ] = p[nf_start:nf_end, f_st_:f_end_, c_st_:c_end_, f_st_:f_end_, f_st_:f_end_]

                layer.params[0].set_value(temp, borrow=False)

            else:
                print "Load: skipping layer" + str(it + 1) + ("" if len(target_shape) == 4 else
                                                              "-            can't load differently shaped perceptron layers (atm)")
            #load b
            if it < len(shp):
                p = cPickle.load(f)
            else:
                p = [[[[]]]]
                print "debug missing"
            save_shape = np.shape(p)
            target_shape = layer.params[1].get_value(borrow=True).shape
            if target_shape[0] == save_shape[0]:
                layer.params[1].set_value(p, borrow=False)
            elif target_shape[0] < save_shape[0]:
                layer.params[1].set_value(p[:min(target_shape[0], save_shape[0])], borrow=False)
            else:
                temp = np.float32(np.random.uniform(1e-5 + (0.5 if layer.activation_func in ["sigmoid", "relu"] else 0),
                                                    1e-6, target_shape))
                for i in range(0, target_shape[0], save_shape[0]):
                    temp[i:min(target_shape[0], save_shape[0] + i)] = p[:min(target_shape[0] - i, save_shape[0])]
                layer.params[1].set_value(temp, borrow=False)
        f.close()
        print "loading complete"

        #function lacks error-handling
    def _loadParametersStrict(self, myfile="CNN.save"):
        """
        Load a parameter set which is **fully compatible** to
        the current network configuration (FAILS otherwise).
        """
        print "Loading from", myfile
        f = open(myfile, 'r')

        shp = cPickle.load(f)
        print "Shapes are:", shp
        print "#Layers =", len(shp)

        for layer in self.layers:
            p = cPickle.load(f)
            layer.params[0].set_value(p, borrow=False)
            p = cPickle.load(f)
            layer.params[1].set_value(p, borrow=False)
            if len(layer.params) == 4:
                p = cPickle.load(f)
                layer.params[2].set_value(p, borrow=False)
                p = cPickle.load(f)
                layer.params[3].set_value(p, borrow=False)

        f.close()

[docs]    def gradstats(self, *args, **kwargs):
        grads = self.debug_gradients_function(*args, **kwargs)
        print("Gradient statistics")
        for g in grads:
            print("shape=%s,\tmean=%f,\tstd=%f" %
                  (g.shape, np.mean(g), np.std(g)))

[docs]    def actstats(self, *args, **kwargs):
        acts = self.get_activities(*args)
        print("Activation statistics")
        for a in acts:
            print("shape=%s,\tmean=%f,\tstd=%f" %
                  (a.shape, np.mean(a), np.std(a)))

[docs]    def paramstats(self, *args, **kwargs):
        print("Parameters statistics")
        for p in self.params:
            p = p.get_value()
            print("shape=%s,\tmean=%f,\tstd=%f" %
                  (p.shape, np.mean(p), np.std(p)))