# -*- coding: utf-8 -*-
# ELEKTRONN - Neural Network Toolkit
#
# Copyright (c) 2014 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Marius Killinger, Gregor Urban
print "Load ELEKTRONN Core"
import time, sys
import cPickle
import numpy as np
import theano
import theano.tensor as T
from elektronn.utils import pprinttime
import optimizer as opt
from perceptronlayer import PerceptronLayer, RecurrentLayer
from convlayer2d import ConvLayer2d
from convlayer3d import ConvLayer3d, AffinityLayer3d, MalisLayer
def _printOps(n):
"""
Return a humanized string representation of a large number.
"""
abbrevs = ((1000000000000, 'Tera Ops'), (1000000000, 'Giga Ops'), (1000000, 'Mega Ops'), (1000, 'kilo Ops'))
for factor, suffix in abbrevs:
if n >= factor:
break
print 'Computational Cost: %.1f %s' % (float(n) / factor, suffix)
[docs]class MixedConvNN(object):
"""
Parameters
----------
input_size: tuple
Data shapes, excluding batch and channel (used to infer the dimensionality)
input_depth: int/None
Is None by default this means non-image data (no conv layers allowed). Change to 1 for b/w, 3 for RGB and\
4 for RGB-D images etc. For RNN this is the length of the time series.
batch_size: int/None
None for variable batch size
enable_dropout: Bool
Turn on or off dropout
recurrent: Bool
Support recurrent iterations along input depth/time
dimension_calc: dimension calculator object
Examples
--------
Note that image data must have at least 1 channel, e.g. a 2d image (1,x,y). 3d requires data in the
format (z,ch,x,y). E.g. to create an isotropic 3d CNN with 5 channels (total input shape is (1,30,5,30,30)):
>>> MixedConvNN((30,30,30), input_depth=5, batch_size=1)
A non-convolutional MLP can be created as:
>>> MixedConvNN((100,), input_depth=None, batch_size=2000)
"""
def __init__(self,
input_size=None,
input_depth=None,
batch_size=None,
enable_dropout=False,
recurrent=False,
dimension_calc=None):
assert input_size is not None
self.layers = [] # [0] input layer ---> [-1] output layer
self.poolings = []
self.params = []
self._output_layers = [] # Empty UNLESS you use add sth explicitly
self._autoencoder_chains = []
self._last_grads = []
self.debug_functions = []
self.debug_conv_output = []
self.debug_gradients_function = []
self.CG_timeline = []
self.batch_size = batch_size
self.n_lab = None
self.input_shape = None
self.patch_size = np.array(input_size
) # onlt the spatial part of input shape
self.output_strides = None
self.output_shape = None
self.mfp_strides = None
self.mfp_offsets = None
self.dimension_calc = dimension_calc
self.TotalForwardPassCost = 0 # number of multiplications done
self.SGD_LR = theano.shared(np.float32(0.09)) # those 3 values are to be overwritten
self.SGD_momentum = theano.shared(np.float32(0.9))
self.global_weightdecay = theano.shared(np.float32(0))
self._SGD_params = {'LR': self.SGD_LR, 'momentum': self.SGD_momentum}
self._RPROP_params = {}
self._CG_params = {}
self._LBFGS_params = {}
self._use_class_weights = False
self._enable_dropout = enable_dropout
self._recurrent = recurrent
self._atleast_single_mfp = False
self._y = None
self._y_aux = []
self.input_noise = None
self.t_init = time.time()
try:
input_size = tuple(input_size, )
except:
input_size = (input_size, )
input_dim = len(input_size)
self.n_dim = input_dim
assert input_dim in [
1, 2, 3
], "MixedConvNN: input_dimension currently not supported"
if input_dim > 1 and input_depth is None:
input_depth = 1
print "For image-like data no depth was specified, using depth=1"
if recurrent:
assert input_dim == 1
self._x = T.ftensor3('x_rnn_input')
if input_depth is not None:
self.input_shape = (batch_size, input_depth, input_size[0]) # [batch, time, feat]
else:
self.input_shape = (batch_size, input_size[0]) # the input is repeated (see "iterations" in rnn layer)
else:
x_dim = input_dim + 1 # +1 because of leading batch dimension
if input_depth is not None: # For images there is always an additional channel dimension (even if it is 1)
x_dim += 1
self.input_shape = (batch_size, input_depth) + input_size
else: # For non image input / prohibits ConvLayers
self.input_shape = (batch_size, ) + input_size
# construct tensor of matching dimensionality
self._x = T.TensorType('float32', (False,) * x_dim, name='x_cnn_input')()
if input_dim == 3: # strange order for theano 3dconv
self.input_shape = (batch_size, input_size[0], input_depth, input_size[1], input_size[2])
print '-' * 60
print "Input shape = ", self.input_shape, "; This is a", input_dim, "dimensional NN"
if batch_size is not None:
self._layer0_input = self._x.reshape(self.input_shape)
else:
self._layer0_input = self._x
print '---'
############################################################################################################
[docs] def addPerceptronLayer(self,
n_outputs=10,
activation_func='tanh',
enable_input_noise=False,
add_in_output_layers=False,
force_no_dropout=False,
W=None,
b=None):
"""
Adds a Perceptron layer to the CNN.
Normally the each layer creates its own set of randomly initialised neuron weights. To reuse the weights
of another layer (weight sharing) use the arguments ``W`` and ``b`` an pass ``T.TensorVariable``.
If ``W`` and ``b`` are numpy arrays own weights are initialised with these values.
Parameters
----------
n_outputs: int
The size of this layer
activation_func: string
{tanh, relu, sigmoid, abs, linear, maxout <i>}
Activation function
enable_input_noise: Bool
If True set 20% of input to 0 randomly (similar to dropout)
force_no_dropout: Bool
Set True for last/output layer
"""
layer_input_shape = self.input_shape if (
self.layers == []) else self.layers[-1].output_shape
layer_input = self._layer0_input if (
self.layers == []) else self.layers[-1].output
if len(layer_input_shape) > 2: # input_dimension >= 2
layer_input = layer_input.flatten(2)
nin = (layer_input_shape[0], np.product(layer_input_shape[1:]))
elif len(layer_input_shape) == 2: # input_dimension = 1
nin = layer_input_shape
else:
raise ValueError('Used invalid input dimension for Perceptron layer')
input_noise = theano.shared(np.float32(0.2)) if enable_input_noise else None
self.input_noise = input_noise if enable_input_noise else self.input_noise
self._y = T.wvector('y_cnn_labels')
layer = PerceptronLayer(
input=layer_input,
n_in=nin[1],
n_out=n_outputs,
batch_size=nin[0],
enable_dropout=(self._enable_dropout and force_no_dropout == False),
activation_func=activation_func,
input_noise=input_noise,
input_layer=self.layers[-1] if len(self.layers) > 0 else None,
W=W,
b=b)
if add_in_output_layers:
self._output_layers.append(layer)
else:
self.layers.append(layer)
if self.batch_size is not None:
num_multiplications = np.product(n_outputs) * np.product(layer_input_shape)
else:
num_multiplications = np.product(n_outputs) * np.product(layer_input_shape[1:])
_printOps(num_multiplications)
print '---'
self.TotalForwardPassCost += num_multiplications
############################################################################################################
[docs] def addConvLayer(self,
nof_filters=None,
filter_size=None,
pool_shape=2,
activation_func='tanh',
add_in_output_layers=False,
force_no_dropout=False,
use_fragment_pooling=False,
reshape=False,
is_last_layer=False,
layer_input_shape=None,
layer_input=None,
W=None,
b=None,
pooling_mode='max',
affinity=False):
"""
Adds a convolutional layer to the CNN. The dimensionality is *automatically* inferred.
Normally the inputs are automatically connected the the outputs of the last added layer. To connect to a
different layer use ``layer_input_shape`` and ``layer_input`` arguments.
Normally the each layer creates its own set of randomly initialised neuron weights. To reuse the weights
of another layer (weight sharing) use the arguments ``W`` and ``b`` an pass ``T.TensorVariable``.
If ``W`` and ``b`` are numpy arrays own weights are initialised with these values.
Parameters
----------
nof_filters: int
Number of feature maps
filter_size: int/tuple
Size/shape of convolutional filters, xy/zxy, (scalars are automatically extended to the 2d or 3d)
pool_shape: int/tuple
Size/shape of pool, xy/zxy, (scalars are automatically extended to the 2d or 3d)
activation_func: string
{tanh, relu, sigmoid, abs, linear, maxout <i>}
Activation function
force_no_dropout: Bool
Set True for last/output layer
use_fragment_pooling: Bool
Set to True for predicting dense images efficiently. Requires batch_size==1.
reshape: Bool
Set to True to get 2d/3d output instead of flattened class_probabilities in the last layer
is_last_layer: Bool
Shorthand for reshape=True, force_no_dropout=True and reconstruction of pooling fragments (if mfp was active)
layer_input_shape: tuple of int
Only needed if layer_input is not not None
layer_input: T.TensorVariable
Symbolic input if you do *not* want to use the previous layer of the cnn. This requires specification of
the shape of that input with ``layer_input_shape``.
W: np.ndarray
weight matrix. If array, the values are used to initialise a shared variable for this layer.
If TensorVariable, than this variable is directly used (weight sharing with the
layer from which this variable comes from)
b: np.ndarray or T.TensorVariable
bias vector. If array, the values are used to initialise a shared variable for this layer.
If TensorVariable, than this variable is directly used (weight sharing with the
layer from which this variable comes from)
pooling_mode: str
'max' or 'maxabs' where the first is normal maxpooling and the second also retains sign of large negative values
"""
n_dim = self.n_dim
assert n_dim in [2, 3], "only 2d and 3d convolution supported!"
if not hasattr(filter_size, '__len__'):
filter_size = (filter_size, ) * n_dim
elif len(filter_size) == 1:
filter_size = filter_size * n_dim
elif len(filter_size) != n_dim:
raise ValueError(
'Filter size must be either scalar or have same length as n_dim')
if not hasattr(pool_shape, '__len__'):
pool_shape = (pool_shape, ) * n_dim
elif len(pool_shape) == 1:
pool_shape = pool_shape * n_dim
self.poolings.append(pool_shape)
if (layer_input_shape is None) and (layer_input is None):
layer_input_shape = self.input_shape if len(self.layers) == 0 else self.layers[-1].output_shape
layer_input = self._layer0_input if len(self.layers) == 0 else self.layers[-1].output
else:
assert (layer_input_shape is not None) and (layer_input is not None),\
"Provide either both input and shape or neither"
assert len(layer_input_shape) in [3,4,5],\
"Please implement the stacking of a convLayer on top of PerceptronLayer (if this is your goal)"
if is_last_layer:
print "Last Layer, by default: no dropout and reshaped outputs"
force_no_dropout = True
reshape = True
if self._atleast_single_mfp:
use_fragment_pooling = True
if use_fragment_pooling:
if self.batch_size != 1:
print("MFP is activated and batch_size is not 1")
#raise ValueError("MFP is activated and batch_size is not 1")
# self.batch_size = 1 doesn't help
# if there is mfp in at least 1 layer the output must be reshaped
self._atleast_single_mfp = use_fragment_pooling or self._atleast_single_mfp
dropout = (self._enable_dropout and force_no_dropout == False)
if n_dim == 2:
filter_shape = (nof_filters, layer_input_shape[1], filter_size[0], filter_size[1])
CL = ConvLayer2d
if reshape:
self._y = T.TensorType('int16', [False, False, False], name='y_cnn_labels')()
if n_dim == 3:
filter_shape = (nof_filters, filter_size[0], layer_input_shape[2], filter_size[1], filter_size[2])
CL = ConvLayer3d
if affinity:
print "WARNING: hack for adding affinity layer / MALIS active"
if affinity == 'malis':
CL = MalisLayer
else:
CL = AffinityLayer3d
if reshape:
self._y = T.TensorType('int16', [False, False, False, False], name='y_cnn_labels')()
layer = CL(
layer_input,
layer_input_shape,
filter_shape,
pool_shape,
activation_func,
dropout,
use_fragment_pooling,
reshape,
self.mfp_offsets,
self.mfp_strides,
input_layer=self.layers[-1] if len(self.layers) > 0 else None,
W=W,
b=b,
pooling_mode=pooling_mode)
self.mfp_offsets = layer.mfp_offsets
self.mfp_strides = layer.mfp_strides
if add_in_output_layers:
self._output_layers.append(layer)
else:
self.layers.append(layer)
# Calculate computational cost
if n_dim == 2:
n_pos = ((layer_input_shape[2]+1-filter_size[0]) *\
(layer_input_shape[3]+1-filter_size[1]))
if n_dim == 3:
n_pos = ((layer_input_shape[1]+1-filter_size[0]) *\
(layer_input_shape[3]+1-filter_size[1]) *\
(layer_input_shape[4]+1-filter_size[2]))
if self.batch_size is not None:
num_multiplications = np.product(filter_size) * n_pos * nof_filters *\
layer_input_shape[1 if n_dim==2 else 2] * layer_input_shape[0]
else:
num_multiplications = np.product(filter_size) * n_pos * nof_filters *\
layer_input_shape[1 if n_dim==2 else 2] # Cost for 1 patch
_printOps(num_multiplications)
print "Param count:", layer.params[0].get_value().size, '+', layer.params[1].get_value().size, '=',\
layer.params[0].get_value().size + layer.params[1].get_value().size
print '---'
self.TotalForwardPassCost += num_multiplications
############################################################################################################
[docs] def addRecurrentLayer(self,
n_hid=None,
activation_func='tanh',
iterations=None):
"""
Adds a recurrent layer (only possible for non-image input of format (batch, time, features))
Parameters
----------
n_hid: int
Number of hidden units
activation_func: string
{tanh, relu, sigmoid, abs, linear}
iterations: int
If layer input is not time-like (iterable on axis 1) it can be broadcasted and
iterated over for a fixed number of iterations
"""
layer_input_shape = self.input_shape if (self.layers == []) else self.layers[-1].output_shape
layer_input = self._layer0_input if (self.layers == []) else self.layers[-1].output
# Padding of constant input
if len(layer_input_shape) == 2:
print "Recurrence with broadcasted input"
assert isinstance(iterations, int)
bs = layer_input_shape[0] if (layer_input_shape[0] is not None) else 1
broadcaster = (bs, iterations, layer_input_shape[1])
layer_input = layer_input.dimshuffle(0, 'x', 1) * T.ones(broadcaster, dtype='float32')
layer_input_shape = (layer_input_shape[0], iterations, layer_input_shape[1])
elif len(layer_input_shape) != 3:
raise ValueError('Used invalid input dimension for Recurrent layer')
nin = layer_input_shape # [batch, time, features]
self._y = T.wvector('y_cnn_labels')
layer = RecurrentLayer(input=layer_input,
n_in=layer_input_shape[2],
n_hid=n_hid,
batch_size=layer_input_shape[0],
activation_func=activation_func)
self.layers.append(layer)
if self.batch_size is not None:
num_multiplications = np.product(nin) * n_hid + nin[0] * nin[1] * n_hid**2
else:
num_multiplications = np.product(nin[1:]) * n_hid + nin[1] * n_hid**2
_printOps(num_multiplications)
print '---'
self.TotalForwardPassCost += num_multiplications
############################################################################################################
[docs] def addTiedAutoencoderChain(self,
n_layers=None,
force_no_dropout=False,
activation_func='tanh',
input_noise=0.3,
tie_W=True):
"""
Creates connected layers to invert Perceptron layers. Input is assumed to come from the first layer.
Parameters
----------
n_layers: int
Number of layers that will be added/inverted, (input < 0 means all)
activation_func: string
{tanh, relu, sigmoid, abs, linear}
Activation function
force_no_dropout: Bool
set True for last/output layer
input_noise: Bool
Noise rate that will be applied to the input of the first reconstructor
tie_W: Bool
Whether to share weight of dual layer pairs
"""
if not n_layers: # Automatically find number of Layers if not specified
n_layers = len(self.layers)
assert 0 < n_layers <= len(self.layers), "Number of Autoencoder layers not possible"
chain = [self.layers[n_layers - 1]] # if n_layers = depth(NN), add last layer, if n_layers is smaller add
# <n_layers>th layer (s.t. a MLP remains after the AE bzw. next to it))
first = True
for i in xrange(n_layers - 1, -1, - 1): # Invert layers starting from the deepest layer
n_outputs = self.layers[i].n_in # Get n_out and Weights from mirror layer
W = self.layers[i].W.T if tie_W else None
n_inputs = chain[-1].output_shape[1] # Get input from previous layer in chain
# (the first in chain is the deepest layer in the normal Net)
batch_size = chain[-1].output_shape[0]
dropout = self._enable_dropout and not force_no_dropout
noise = input_noise if first else None
PLayer = PerceptronLayer(input=chain[-1].output,
n_in=n_inputs,
n_out=n_outputs,
batch_size=batch_size,
enable_dropout=dropout,
activation_func=activation_func,
W=W,
input_noise=noise,
input_layer=chain[-1])
chain.append(PLayer)
first = False
self._autoencoder_chains.extend(chain[1:]) # only keep the newly added Layers
if not tie_W:
self.layers += chain[1:]
############################################################################################################
[docs] def compileDebugFunctions(self, gradients=True):
"""
Compiles the debug_functions which return the network activations / output. To use them compile them with
this function. They by accessible as cnn.debug_functions (normal output), cnn.debug_conv_output,
cnn.debug_gradients_function (if True).
"""
if len(self.debug_functions) != 0:
print "debug functions are not empty"
return
for lay in self.layers:
self.debug_functions.append(theano.function([self._x], lay.output))
try: # This is the output before pooling etc.
self.debug_conv_output.append(theano.function(
[self._x],
lay.conv_output,
on_unused_input='ignore'))
except:
pass
if gradients:
self.debug_gradients_function = opt.Optimizer(self).compileGradients()
############################################################################################################
[docs] def compileOutputFunctions(self,
target='nll',
use_class_weights=False,
use_example_weights=False,
use_lazy_labels=False,
use_label_prop=False,
only_forward=False):
"""
Compiles the output functions ``get_loss``, ``get_error``, ``class_probabilities`` and defines the
gradient (which is not compiled)
Parameters
----------
target: string
'nll'/'regression', regression has squared error and nll_masked allows training with
lazy labels; this requires the auxiliary (*aux) masks.
use_class_weights: Bool
whether to use class weights for the error
use_example_weights: Bool
whether to use example weights for the error
use_lazy_labels: Bool
whether to use lazy labels; this requires the auxiliary (*aux) masks
use_label_prop: Bool
whether to activate label propagation on unlabelled (-1) examples
only_forward: Bool
This exlcudes the building of the gradient (faster)
Defined functions:
(They are accessible as methods of ``MixedConvNN``)
get_loss: theano-function
[data, labels(, *aux)] --> [loss, loss_instance]
get_error: theano-function
[data, labels(, *aux)] --> [loss, (error,) prediction] no error for regression
class_probabilities: theano-function
[data] --> [prediction]
"""
print "GLOBAL"
_printOps(self.TotalForwardPassCost)
self.t_graph = time.time()
for lay in self.layers:
if lay.params != []:
self.params.extend(lay.params[::-1]) # (b, W)
if self._autoencoder_chains is not []: # add thos layers, but not to the params
self.layers.extend(self._autoencoder_chains)
self.param_count = np.sum([np.prod(p.get_value().shape) for p in self.params])
print "Total Count of trainable Parameters:", self.param_count
print "Building Computational Graph took %.3f s" % (self.t_graph - self.t_init)
pp_cw = "using class_weights" if use_class_weights else "using no class_weights"
pp_ew = "using example_weights" if use_example_weights else "using no example_weights"
pp_ll = "using lazy_labels" if use_lazy_labels else "using no lazy_labels"
pp_lp = "label propagation active" if use_label_prop else "label propagation inactive"
print "Compiling output functions for %s target:\n\t%s\n \t%s\n \t%s\n \t%s\n" % (
target, pp_cw, pp_ew, pp_ll, pp_lp)
if len(self._output_layers) != 0:
print "Warning: <compileOutputFunctions> only applies to the LAST layer in self.layers \
(and ignores elements of self._output_layers)"
layer_last = self.layers[-1]
self.output_shape = layer_last.output_shape
if (len(layer_last.output_shape) == 2) or self.n_dim != 3: #Perceptron layer or any other
self.n_lab = layer_last.output_shape[1]
else:
self.n_lab = layer_last.output_shape[2]
# Define Target functions
if target == 'regression':
n_dim_regression = len(layer_last.output_shape)
if isinstance(layer_last, (ConvLayer2d, ConvLayer3d)):
n_dim_regression -= 1 # spatial input has no channel...
self._y = T.TensorType('float32', (False,) * n_dim_regression, name='y_cnn_regression_targets')()
self._loss, self._loss_instance = layer_last.squared_distance(self._y)
ret = [self._loss, T.sqrt(self._loss), layer_last.output]
self.get_error = theano.function([self._x, self._y], ret)
self.prediction = theano.function([self._x], layer_last.output)
elif target == 'nll_mutiple_binary':
self._y = T.wmatrix('y_nll_mutiple_binary_targets')
if use_class_weights:
class_weights = T.TensorType('float32', [False], name='class_weights')()
self._y_aux.append(class_weights)
else:
class_weights = None
self._loss, self._loss_instance = layer_last.nll_mutiple_binary(self._y, class_weights)
elif target == 'nll_weak':
if use_class_weights:
class_weights = T.TensorType('float32', [False], name='class_weights')()
self._y_aux.append(class_weights)
else:
class_weights = None
self._loss, self._loss_instance = layer_last.NLL_weak(self._y, class_weights)
elif target == 'affinity':
self._y = T.TensorType('int16', (False,) * 5, name='y_cnn_affinity_targets')()
if use_class_weights:
class_weights = T.TensorType('float32', [False], name='class_weights')()
self._y_aux.append(class_weights)
else:
class_weights = None
self._loss, self._loss_instance = layer_last.NLL_affinity(self._y, class_weights)
elif target == 'malis':
self._y = T.TensorType('int16', (False,) * 5, name='y_cnn_affinity_targets')()
self._y_aux.append(T.TensorType('int16', (False,) * 4, name='y_cnn_seg_gt')())
if use_class_weights:
class_weights = T.TensorType('float32', [False], name='class_weights')()
self._y_aux.append(class_weights)
else:
class_weights = None
ret = layer_last.NLL_Malis(self._y, self._y_aux[0])
self._loss = ret[0]
self._loss_instance = ret[0]
self.malis_stats = theano.function([self._x, self._y, self._y_aux[0]], ret)
elif target == 'nll':
if use_lazy_labels:
if not (isinstance(layer_last, ConvLayer2d) or isinstance(layer_last, ConvLayer3d)):
raise ValueError("Cannot use lazy labels for Percptron layer")
mask1 = T.TensorType('int16', [False, False], name='mask_class_labeled')()
self._y_aux.append(mask1)
mask2 = T.TensorType('int16', [False, False], name='mask_class_not_present')()
self._y_aux.append(mask2)
else:
mask1, mask2 = None, None
if use_class_weights:
class_weights = T.TensorType('float32', [False], name='class_weights')()
self._y_aux.append(class_weights)
else:
class_weights = None
if use_example_weights:
example_weights = T.TensorType('float32', (False,) * (self._x.ndim - 1), name='example_weights')()
self._y_aux.append(example_weights)
else:
example_weights = None
if use_label_prop:
label_prop_thresh = T.fscalar('label_prop_thresh')
self._y_aux.append(label_prop_thresh)
else:
label_prop_thresh = None
if use_lazy_labels:
self._loss, self._loss_instance = layer_last.NLL(
self._y,
class_weights,
example_weights,
mask_class_labeled=mask1,
mask_class_not_present=mask2,
label_prop_thresh=label_prop_thresh)
else:
self._loss, self._loss_instance = layer_last.NLL(
self._y,
class_weights,
example_weights,
label_prop_thresh=label_prop_thresh)
# aux is possibly [mask_class_labeled, mask_class_not_present, class_weights, label_prop_thresh]
# For all targets except regression the predictions / accuracy
if target != 'regression':
ret = [self._loss, layer_last.errors(self._y), layer_last.class_prediction]
if target == 'nll_mutiple_binary':
ret = [self._loss, layer_last.errors_no_tn(self._y), layer_last.class_prediction]
self.get_error = theano.function([self._x, self._y] + self._y_aux, ret)
self.class_probabilities = theano.function([self._x], layer_last.class_probabilities)
# create a list of symbolic gradients for all model parameters
if not only_forward:
self._gradients = T.grad(self._loss, self.params, disconnected_inputs="warn")
self.get_loss = opt.Optimizer(self).get_loss
if isinstance(layer_last, (ConvLayer2d, ConvLayer3d, AffinityLayer3d)):
try:
self.output_shape = layer_last.prob_shape
except:
pass
self.output_strides = map(np.prod, zip(*self.poolings))
if self.mfp_strides is not None:
self.output_strides = np.divide(self.output_strides, self.mfp_strides)
self.t_out = time.time()
print " Compiling done - in %.3f s!" % (self.t_out - self.t_graph)
print '-' * 60
print '-' * 60
############################################################################################################
[docs] def resetMomenta(self):
"""Resets the trailing average of the gradient to sole current gradient"""
print "CNN: resetting momenta"
print '\t'.join([str(len(x)) for x in (self.params, self._last_grads)])
for para, lg in zip(self.params, self._last_grads):
sp = para.get_value().shape
lg.set_value(np.zeros(sp, dtype='float32'), borrow=0)
try:
for para, rp in zip(self.params, self._RPROP_LRs, ):
sp = para.get_value().shape
rp.set_value(1e-3 * np.ones(sp, dtype='float32'), borrow=0)
except:
pass
[docs] def randomizeWeights(self, reset_momenta=True):
"""Resets weights to random values (calls randomize_weights() on each layer)"""
print "CNN: Randomizing weights"
for lay in self.layers + self._output_layers:
lay.randomizeWeights()
if reset_momenta:
self.resetMomenta()
############################################################################################################
### Controlling Training ###################################################################################
############################################################################################################
[docs] def trainingStep(self, *args, **kwargs):
"""
Perform one optimiser iteration.
Optimizers can be chosen by the kwarg ``mode``. They are complied on demand (which may take a while) and cached
**Signature**: cnn.trainingStep(data, label(, *aux)(,**kwargs))
Parameters
----------
data: float32 array
input [bs, ch (, x, y)] or [bs, z, ch, x, y]
labels: int16 array
[bs,((z,)y,x)] if output is not flattened
aux: int16 arrays
(optional) auxiliary weights/masks/etc. Should be unpacked list
kwargs:
* mode: string
['SGD']: (default) Good if data set is big and redundant
'RPROP': which does neither uses a fix learning rate nor the momentum-value.
It is faster than SGD if you do full-batch Training and use NO dropout.
Any source of noise leads to failure of convergence (at all).
'CG': Good generalisation but requires large batches. Returns current loss always
'LBFGS': http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html
* update_loss: Bool
determine current loss *after* update step (e.g. needed for queue, but ``get_loss`` can also be\
called explicitly)
Returns
-------
loss: float32
loss (nll or squared error)
loss_instance: float32 array
loss for individual batch examples/pixels
time_per_step: float
Time spent on the GPU per step
"""
mode = kwargs.get('mode', 'SGD')
param_var = None
t0 = time.time()
# Check if auxiliary arguments are ok
if len(args) != (len(self._y_aux) + 2):
raise ValueError("The number of auxiliary arguments for the NLL is not matching the compiled signature: "
"%s. Got %i auxiliary args." % (self._y_aux, len(args) - 2))
if mode == 'SGD':
if not hasattr(self, 'SGD'):
self.SGD = opt.compileSGD(self._SGD_params, self)
loss, loss_instance = self.SGD(*args)
if kwargs.get('update_loss', False):
loss, loss_instance = self.get_loss(*args)
elif mode == 'RPROP':
if not hasattr(self, 'RPROP'):
self.RPROP = opt.compileRPROP(self._RPROP_params, self)
loss, loss_instance = self.RPROP(*args)
if kwargs.get('update_loss', False):
loss, loss_instance = self.get_loss(*args)
elif mode == 'CG':
if not hasattr(self, 'CG'):
self.CG = opt.compileCG(self._CG_params, self)
loss, loss_instance = self.CG(*args) # this already is updated loss
elif mode == 'LBFGS':
if not hasattr(self, 'LBFGS'):
self.LBFGS = opt.compileLBFGS(self._LBFGS_params, self)
loss = self.LBFGS(*args) # this already is updated loss
loss_instance = loss
elif mode == 'Adam':
if not hasattr(self, 'Adam'):
self.Adam = opt.compileAdam(self._Adam_params, self)
loss, loss_instance = self.Adam(*args) # this already is updated loss
else:
print "No mode %s" % mode
return 0, 0, 0
t = (time.time() - t0) + 1e-10 # add some epsilon to ensure > 0
return np.float32(loss), loss_instance, t ### TODO remove again
[docs] def setOptimizerParams(self,
SGD={},
CG={},
RPROP={},
LBFGS={},
Adam={},
weight_decay=0.0):
"""
Initialise optimiser hyper-parameters prior to compilation. SGD, CG and LBFGS this can also be done during
Training.
``weight_decay`` is global to all optimisers and
is identical to a L2-penalty on the weights with the coefficient given by ``weight_decay``
"""
if weight_decay == False:
self.global_weightdecay.set_value(np.float32(0), borrow=False)
else:
self.global_weightdecay.set_value(np.float32(weight_decay), borrow=False)
self.setSGDLR(SGD.get("LR", 0.001))
self.setSGDMomentum(SGD.get("momentum", 0.9))
self._RPROP_params = dict(penalty=0.35,
gain=0.2,
beta=0.7,
initial_update_size=1e-4)
self._RPROP_params.update(RPROP)
self._CG_params = dict(n_steps=3,
alpha=0.35,
beta=0.7,
max_step=0.02,
min_step=8e-5,
only_descent=False,
show=False)
self._CG_params.update(CG)
self._LBFGS_params = dict(maxfun= 40, # function evaluations
maxiter= 4, # iterations
m= 10, # maximum number of variable metric corrections
factr= 1e2, # factor of machine precision as termination criterion (haha!)
pgtol= 1e-9, # projected gradient tolerance
iprint= -1) # set to 0 for direct printing of steps
self._LBFGS_params.update(LBFGS)
self._Adam_params = {}
self._Adam_params.update(Adam)
if hasattr(self, 'SSGD'):
self.SSGD.updateOptimizerParams(SSGD)
if hasattr(self, 'CG'):
self.CG.updateOptimizerParams(CG)
if hasattr(self, 'LBFGS'):
self.LBFGS.updateOptimizerParams(LBFGS)
[docs] def setSGDLR(self, value=0.09):
self.SGD_LR.set_value(np.float32(value), borrow=False)
[docs] def setSGDMomentum(self, value=0.9):
self.SGD_momentum.set_value(np.float32(value), borrow=False)
[docs] def setWeightDecay(self, value=0.0005):
self.global_weightdecay.set_value(np.float32(value), borrow=False)
[docs] def setDropoutRates(self, rates):
"""Assumes a vector/list/array as input, first entry <--> first layer (etc.)"""
for lay, ra, i in zip(self.layers, rates, range(len(rates))):
try:
assert 1.0 >= np.float32(ra) >= 0, "Dropout rates must be [0,1]"
lay.activation_noise.set_value(np.float32(ra))
#print 'layer',i,'new noise rate:',np.float32(ra*100.0),'%'
except:
#print 'set_dropout_rates: Warning: Dropout not enabled in this layer'
pass
[docs] def getDropoutRates(self):
"""Returns list of dropout rates"""
rates = []
for lay in self.layers:
try:
rates.append(np.float32(lay.activation_noise.get_value()))
except:
pass
#sys.excepthook(*sys.exc_info())
return rates
############################################################################################################
### Utilities ##############################################################################################
############################################################################################################
def _predictDenseTile(self, raw_img, out_arr, offset):
"""
Parameters
----------
raw_img: np.ndarray
raw image (ch, x, y) or (z, ch, x, y)to be predicted
The shape must be cnn.patch_size + cnn.output_strides - 1 (elwise)
out_arr: np.ndarray
The shape is cnn.patch_size + cnn.mfp_strides - floor(cnn.offset) - 1 (elwise)
offsets: array / list
The cnn offsets (only needed if cnn was initialised without a dimension calculator)
Returns
-------
class_probabilities: np.ndarray
prediction (n_lab, z, x, y)
The shape is cnn.patch_size + cnn.mfp_strides - floor(cnn.offset) - 1 (elwise)
"""
if np.all(np.equal(self.output_strides, 1)):
if self.n_dim == 2:
out_arr[:, 0] = self.class_probabilities(raw_img[None])[0] # (ch,x,y)
else:
out_arr[:] = self.class_probabilities(raw_img[None])[0] # (z,ch,x,y)
else:
for x_off in range(self.output_strides[-2]):
for y_off in range(self.output_strides[-1]):
if self.n_dim == 2:
cut_img = raw_img[None, :, x_off:x_off + self.patch_size[0], y_off:y_off + self.patch_size[1]]
#prob = self.class_probabilities(cut_img)[0]
# insert prob(ch, x, y) into out_arr(ch,z,x,y)
out_arr[:, 0, x_off::self.output_strides[0], y_off::
self.output_strides[1]] = self.class_probabilities(cut_img)[0]
elif self.n_dim == 3:
for z_off in range(self.output_strides[0]):
cut_img = raw_img[None,z_off:z_off + self.patch_size[0], :,
x_off:x_off + self.patch_size[1], y_off:y_off + self.patch_size[2]]
#prob = self.class_probabilities(cut_img)[0]
out_arr[:, z_off::self.output_strides[0], x_off::self.output_strides[1], y_off::
self.output_strides[2]
] = self.class_probabilities(cut_img)[0]
return out_arr
[docs] def predictDense(self,
raw_img,
show_progress=True,
offset=None,
as_uint8=False,
pad_raw=False):
"""
Core function that performs the inference
raw_img : np.ndarray
raw data in the format (ch, x, y(, z))
show_progress: Bool
Whether to print progress state
offset: 2/3-tuple
If the cnn has no dimension calculator object, this specifies the cnn offset.
as_uint8: Bool
Return class proabilites as uint8 image (scaled between 0 and 255!)
pad_raw: Bool
Whether to apply padding (by mirroring) to the raw input image
in order to get predictions on the full imgae domain.
"""
# WARNING: this code contains mixed orders of xyz and zxy! The raw_img is swapped later!
# determine normalisation depending on int or float type
if raw_img.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32,
np.uint, np.uint8, np.uint16, np.uint32, np.uint32]:
m = 255
else:
m = 1
raw_img = np.ascontiguousarray(raw_img, dtype=np.float32) / m
time_start = time.time()
strip_z = False
if len(raw_img.shape) == 3:
strip_z = True
raw_img = raw_img[..., None] # add singleton z-channel
if self.dimension_calc is not None:
offset = np.floor(self.dimension_calc.offset).astype(np.int)
else:
assert offset is not None,"If the cnn has not been intialised with a dimension calculator object, you must pass the offset to this function explicitly"
offset = np.floor(offset).astype(np.int)
n_lab = self.n_lab
cnn_out_sh = self.output_shape[2:] # without batch size and channel/n_lab
ps = self.patch_size
strides = self.output_strides
if self.n_dim == 2:
cnn_out_sh = np.concatenate([[1, ], cnn_out_sh])
ps = np.concatenate([[1, ], ps])
strides = np.concatenate([[1, ], strides])
offset = np.concatenate([[0, ], offset])
if pad_raw:
raw_img = np.pad(raw_img, [(0, 0), (offset[1], offset[1]), (offset[2], offset[2]), (offset[0], offset[0])],
mode='symmetric')
raw_sh = raw_img.shape[1:] # only spatial, not channels
tile_sh = np.add(ps, strides) - 1 # zxy
#prob_sh = np.array([ps[i]+strides[i]-1-2*offset[i] for i in xrange(3)]) # zxy
prob_sh = np.multiply(cnn_out_sh, strides)
prob_arr = np.zeros(np.concatenate([[self.n_lab, ], prob_sh]), dtype=np.float32) # zxy
pred_sh = np.array([raw_sh[0] - 2 * offset[1], raw_sh[1] - 2 * offset[2], raw_sh[2] - 2 * offset[0]]) # xyz
if as_uint8:
predictions = np.zeros(np.concatenate(([n_lab, ], pred_sh)), dtype=np.uint8) # xyz
else:
predictions = np.zeros(np.concatenate(([n_lab, ], pred_sh)), dtype=np.float32) # xyz
if self._atleast_single_mfp and not np.all(np.equal(self.output_strides, 1)):
raise NotImplementedError("If MFP is partially enabled, the dense prediction does not work atm")
# Calculate number of tiles (in 3d: blocks) that need to be performed
x_tiles = int(np.ceil(float(pred_sh[0]) / prob_sh[1]))
y_tiles = int(np.ceil(float(pred_sh[1]) / prob_sh[2]))
z_tiles = int(np.ceil(float(pred_sh[2]) / prob_sh[0]))
total_nb_tiles = np.product([x_tiles, y_tiles, z_tiles])
print "Predicting img", raw_img.shape, "in", total_nb_tiles, "Blocks:", (x_tiles, y_tiles, z_tiles)
count = 0
for x_t in range(x_tiles):
for y_t in range(y_tiles):
for z_t in range(z_tiles):
# For every z_tile a slice of thickness cnn_out_sh[2] is
# collected and then collectively written to the output_data
raw_tile = raw_img[:, x_t * prob_sh[1]:x_t * prob_sh[1] + tile_sh[1],
y_t * prob_sh[2]:y_t * prob_sh[2] + tile_sh[2],
z_t * prob_sh[0]:z_t * prob_sh[0] + tile_sh[0]]
this_is_end_tile = False if np.all(np.equal(raw_tile.shape[1:], np.roll(tile_sh, 2))) else True
if this_is_end_tile: # requires 0-padding
right_pad = np.subtract(np.roll(tile_sh, 2), raw_tile.shape[1:]) # (ch,x,y,z)
right_pad = np.concatenate(([0, ], right_pad)) # for channel dimension
left_pad = np.zeros(raw_tile.ndim, dtype=np.int)
pad_with = list(zip(left_pad, right_pad))
raw_tile = np.pad(raw_tile, pad_with, mode='constant')
if self.n_dim == 2:
# slice from raw_tile(ch,x,y,z) --> (ch,x,y)
prob_arr = self._predictDenseTile(raw_tile[..., 0], prob_arr, offset) # returns (ch,z=1,x,y)
prob = prob_arr[:, 0, :, :, None] # (ch,z=1,x,y) -> (ch,x,y,z=1)
else:
raw_tile = np.transpose(raw_tile, (3, 0, 1, 2)) # (ch,x,y,z) -> (z,ch,x,y)
prob_arr = self._predictDenseTile(raw_tile, prob_arr, offset)
prob = np.transpose(prob_arr, (0, 2, 3, 1)) # (ch,z,x,y) -> (ch,x,y,z)
if this_is_end_tile: # cut away padded range
prob = prob[:, :prob_sh[1] - right_pad[1],
:prob_sh[2] - right_pad[2], :prob_sh[0] - right_pad[3]]
if as_uint8:
prob *= 255
prob = prob.astype(np.uint8) # maybe not needed...
predictions[:, x_t * prob_sh[1]:(x_t + 1) * prob_sh[1], y_t * prob_sh[2]:(y_t + 1) * prob_sh[2],
z_t * prob_sh[0]:(z_t + 1) * prob_sh[0]] = prob
count += 1
if show_progress:
dtime = time.time() - time_start
progress = count * 100.0 / total_nb_tiles
estimate = dtime / progress * 100.
if progress <= 100:
dtime = pprinttime(dtime)
estimate = pprinttime(estimate)
sys.stdout.write('\rProgress: %.2f%% in %s; estimate: %s' % (progress, dtime, estimate))
sys.stdout.flush()
sys.stdout.write(' - done\n'.decode("string_escape"))
sys.stdout.flush()
print "Inference speed: %.3f MB or MPix /s\n" %\
(np.product(predictions.shape[1:]) * 1.0 / 1000000 / (time.time() - time_start))
if strip_z: predictions = predictions[:, :, :, 0]
return predictions
[docs] def get_activities(self, data):
n = len(self.layers)
activities = []
for layer, dbgf, i in zip(self.layers, self.debug_functions, range(n)):
activities.append(dbgf(data))
return activities
[docs] def get_nonpooled_activities(self, data):
n = len(self.layers)
activities = []
for layer, dbgf, i in zip(self.layers, self.debug_conv_output, range(n)):
activities.append(dbgf(data))
return activities
[docs] def saveParameters(self, path='CNN.save', layers=None, show=True):
"""Saves parameters to file, that can be loaded by ``loadParameters``"""
if show:
print 'Saving params to file'
f = open(path, 'w')
if layers is None:
n_lay = len(self.layers) - len(self._autoencoder_chains) # exclude the AE chains (they have only shared W)
layers = self.layers[:n_lay]
shape_info = []
for lay in layers:
shape_info.append(lay.params[0].get_value(borrow=True).shape)
if show:
print ' shapes are: ' + str(shape_info)
cPickle.dump(shape_info, f, protocol=2)
for lay in layers:
cPickle.dump(lay.params[0].get_value(borrow=True), f, protocol=2)
cPickle.dump(lay.params[1].get_value(borrow=True), f, protocol=2)
if len(lay.params) > 2: # Recurrent Params
cPickle.dump(lay.params[2].get_value(borrow=True), f, protocol=2)
cPickle.dump(lay.params[3].get_value(borrow=True), f, protocol=2)
cPickle.dump(self.poolings, f, protocol=2) # list of all pooling factors
f.close()
[docs] def loadParameters(self, myfile="CNN.save", strict=False, n_layers_to_load=-1):
"""
Loads parameters from file created by ``saveParameters``. The parameter shapes do not need to fit the CNN
architecture, they "squeezed" or "padded" to fit.
Additionally the momenta of the gradients are reset
Parameters
----------
myfile: string
Path to file
strict: bool
If true, parameter shapes must fit exactly, this the only way to load RNN parameters
n_layers_to_load: int
Only the first x layers are initialised if this is not at its default value (-1)
"""
self.resetMomenta()
if strict:
self._loadParametersStrict(myfile)
else:
self._loadParametersAdaptive(myfile, n_layers_to_load=n_layers_to_load)
def _loadParametersAdaptive(self, myfile="CNN.save", n_layers_to_load=-1):
"""
Load a parameter set which is NOT fully compatible to the current network configuration
(e.g. different filter sizes, number of filters etc).
detects if layers already are in correct shape
"""
print "loading(adaptive) from", myfile
try:
f = open(myfile, 'r')
except:
print "CNN: ERROR: Cannot load file '", myfile, "'"
shp = cPickle.load(f)
print "Shapes of loaded file are:", shp
print "Shapes of current Net are:", [lay.params[0].get_value(borrow=True).shape for lay in self.layers]
if n_layers_to_load < 0:
n_layers_to_load = len(shp)
print "#Layers(sav) =", len(shp), "loading", n_layers_to_load
print "#Layers(CNN) =", len(self.layers)
for it, layer in enumerate(self.layers):
if it == n_layers_to_load:
break
if it < len(shp):
try:
p = cPickle.load(f)
except:
print "Error! " * 7
print "LoadParametersAdaptive::ERROR: invalid file, cancelled after", it, "layers were loaded!"
e = sys.exc_info()[0]
print "<p>Error: %s</p>" % e
print "Error! " * 7
else:
p = [[[[]]]]
print "debug missing, might crash now!"
save_shape = np.shape(p)
target_shape = layer.params[0].get_value(borrow=True).shape
#load W
if save_shape == target_shape:
layer.params[0].set_value(p, borrow=False)
elif len(target_shape)==len(save_shape) and len(save_shape)==4:
#temp param of correct shape, weights with same variance as loaded parameters (mean=0)
temp = np.float32(np.random.normal(0,0.02,target_shape))
if (target_shape[0]>save_shape[0]):#need more filters than in save
for i in range(0,target_shape[0],save_shape[0]):
if target_shape[1]>save_shape[1]:
for j in range(0, target_shape[1], save_shape[1]):
temp[i:min(target_shape[0], save_shape[0] + i),
j:min(target_shape[1], save_shape[1] + j), :min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])
] = p[:(min(target_shape[0], save_shape[0] + i) - i),
:min(target_shape[1] - j, save_shape[1]),
:min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])]
else:
temp[i:min(target_shape[0], save_shape[0] + i), :target_shape[1],
:min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])
] = p[:(min(target_shape[0], save_shape[0] + i) - i),
:target_shape[1],
:min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])]
else:
if target_shape[1] > save_shape[1]:
for j in range(0, target_shape[1], save_shape[1]):
temp[:min(target_shape[0], save_shape[0]), j:min(target_shape[1], save_shape[1] + j),
:min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])
] = p[:min(target_shape[0], save_shape[0]),
:min(target_shape[1] - j, save_shape[1]),
:min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])
] + np.random.rand(min(target_shape[0], save_shape[0]),
min(target_shape[1], save_shape[1]+j)-j,
min(target_shape[2], save_shape[2]),
min(target_shape[3], save_shape[3])
) * 1e-4
else:
mid_offset= 0
if target_shape[1] < save_shape[1]:
mid_offset = int((save_shape[1] - target_shape[1]) / 2.)
temp[:min(target_shape[0],save_shape[0]),
:target_shape[1],
:min(target_shape[2],save_shape[2]),
:min(target_shape[3],save_shape[3])
] = p[:min(target_shape[0], save_shape[0]),
mid_offset:mid_offset + target_shape[1],
:min(target_shape[2], save_shape[2]),
:min(target_shape[3], save_shape[3])]
layer.params[0].set_value(temp, borrow=False)
elif len(target_shape) == len(save_shape) and len(save_shape) == 5:
print "adapting 3D_net_filter..."
#(64, 3, 32, 3, 3) #n. = 64, depth=32
#print "fan-in correction factor =",n_params_ratio
temp = np.float32(np.random.normal(0, np.std(p) + 1e-9, target_shape)) #/6.*n_params_ratio
nf_start = 0
nf_end = min(target_shape[0], save_shape[0])
f_st = max(int((target_shape[1] - save_shape[1]) / 2.), 0)
f_end = f_st + min(target_shape[1], save_shape[1])
f_st_ = max(int((save_shape[1] - target_shape[1]) / 2.), 0)
f_end_ = f_st_ + min(target_shape[1], save_shape[1])
c_st = 0
c_end = c_st + min(target_shape[2], save_shape[2]) #
c_st_ = 0
c_end_ = c_st_ + min(target_shape[2], save_shape[2]) #
temp[nf_start:nf_end, f_st:f_end, c_st:c_end, f_st:f_end, f_st:f_end
] = p[nf_start:nf_end, f_st_:f_end_, c_st_:c_end_, f_st_:f_end_, f_st_:f_end_]
layer.params[0].set_value(temp, borrow=False)
else:
print "Load: skipping layer" + str(it + 1) + ("" if len(target_shape) == 4 else
"- can't load differently shaped perceptron layers (atm)")
#load b
if it < len(shp):
p = cPickle.load(f)
else:
p = [[[[]]]]
print "debug missing"
save_shape = np.shape(p)
target_shape = layer.params[1].get_value(borrow=True).shape
if target_shape[0] == save_shape[0]:
layer.params[1].set_value(p, borrow=False)
elif target_shape[0] < save_shape[0]:
layer.params[1].set_value(p[:min(target_shape[0], save_shape[0])], borrow=False)
else:
temp = np.float32(np.random.uniform(1e-5 + (0.5 if layer.activation_func in ["sigmoid", "relu"] else 0),
1e-6, target_shape))
for i in range(0, target_shape[0], save_shape[0]):
temp[i:min(target_shape[0], save_shape[0] + i)] = p[:min(target_shape[0] - i, save_shape[0])]
layer.params[1].set_value(temp, borrow=False)
f.close()
print "loading complete"
#function lacks error-handling
def _loadParametersStrict(self, myfile="CNN.save"):
"""
Load a parameter set which is **fully compatible** to
the current network configuration (FAILS otherwise).
"""
print "Loading from", myfile
f = open(myfile, 'r')
shp = cPickle.load(f)
print "Shapes are:", shp
print "#Layers =", len(shp)
for layer in self.layers:
p = cPickle.load(f)
layer.params[0].set_value(p, borrow=False)
p = cPickle.load(f)
layer.params[1].set_value(p, borrow=False)
if len(layer.params) == 4:
p = cPickle.load(f)
layer.params[2].set_value(p, borrow=False)
p = cPickle.load(f)
layer.params[3].set_value(p, borrow=False)
f.close()
[docs] def gradstats(self, *args, **kwargs):
grads = self.debug_gradients_function(*args, **kwargs)
print("Gradient statistics")
for g in grads:
print("shape=%s,\tmean=%f,\tstd=%f" %
(g.shape, np.mean(g), np.std(g)))
[docs] def actstats(self, *args, **kwargs):
acts = self.get_activities(*args)
print("Activation statistics")
for a in acts:
print("shape=%s,\tmean=%f,\tstd=%f" %
(a.shape, np.mean(a), np.std(a)))
[docs] def paramstats(self, *args, **kwargs):
print("Parameters statistics")
for p in self.params:
p = p.get_value()
print("shape=%s,\tmean=%f,\tstd=%f" %
(p.shape, np.mean(p), np.std(p)))