Source code for purestochastic.model.deep_ensemble

import tensorflow as tf
from tensorflow import keras
import numpy as np
from purestochastic.model.layers import *
from purestochastic.model.activations import *
from purestochastic.model.base_uncertainty_models import *
from keras.layers import InputLayer, Dense, Input
from purestochastic.common.metrics import *

[docs]class DeepEnsembleModel(StochasticModel): """ Implementation of the DeepEnsemble model. The Deep Ensemble [1]_ is an ensemble of Deep Learning model trained independently and combined for prediction in order to estimate uncertainty. The model can be constructed manually or it's possible to use the method ``toDeepEnsemble`` to convert a simple :class:`keras.Model` object into a :class:`DeepEnsembleModel` object. This class don't need specific loss function and can't use all of the tensorflow loss function and also custom loss functions. Methods ------- compute_loss(x=None, y=None, y_pred=None, sample_weight=None): Compute the loss independently for each model. _combine_predictions(predictions, stacked): Combine the predictions made by the models. compute_metrics(x, y, predictions, sample_weight): Specify the mean and stochastic part of the predictions to compute the metrics. predict(x): Compute the predictions of the model thanks to the `_combine_predictions` method. References ---------- .. [1] Balaji Lakshminarayanan, Alexander Pritzel et Charles Blundell. « Simple and scalable predictive uncertainty estimation using deep ensembles ». In : Advances in Neural Information Processing Systems 2017-Decem.Nips (2017), p. 6403-6414. issn : 10495258. arXiv : 1612.01474. """
[docs] def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None): """ Custom ``compute_loss`` function. This method overrides the ``compute_loss`` function so that the class doesn't need specific loss function. It computes the loss for each model independently. Arguments --------- x : tf.Tensor Input data. y : tf.Tensor Target data. y_pred : tf.Tensor Predictions returned by the model (output of ``model(x)``) sample_weight : optional Sample weights for weighting the loss function. Returns ------- The total loss. """ # Define a function that computes the loss for one model def compute_loss_single_model(ytilde): return self.compiled_loss(y, ytilde, sample_weight, regularization_losses=self.losses) # Parallelization of the computation return tf.reduce_mean(tf.vectorized_map(compute_loss_single_model, tf.transpose(y_pred, (1,0) + tuple([i+2 for i in range(0, len(y_pred.shape)-2)]) )))
[docs] def _combine_predictions(self, predictions, stacked): r""" Combine the predictions of all the models in order to quantify the uncertainty. This method combines the prediction of all the models in order to quantify uncertainty. The computation of uncertainty and the mean prediction is different according to the structure of the network. For the moment, there are 2 possibilities (B = number of models): * Mean Variance Activation (see method ``MeanVarianceActivation``)): * Mean : :math:`\hat{\mu} = \dfrac{1}{B} \sum_{i=1}^{B} \hat{\mu}_i` * Epistemic Variance : :math:`\hat{\sigma}^2_{epi} = \dfrac{1}{B} \sum_{i=1}^{B} (\hat{y}_i - \hat{\mu})^2` * Aleatoric Variance : :math:`\hat{\sigma}^2_{alea} = \dfrac{1}{B} \sum_{i=1}^{B} (\sigma^2_i)` * No specific structure : * Mean : :math:`\hat{y} = \dfrac{1}{B} \sum_{i=1}^{B} \hat{y}_i` * Variance : :math:`\hat{\sigma}^2 = \dfrac{1}{B} \sum_{i=1}^{B} (\hat{y}_i - \hat{y})^2` In the future, it will be possible to add other possibilities. Arguments --------- predictions : tf.Tensor Predictions returned by the model (output of ``model(x)``) stacked : boolean Boolean to indicate wheter the output should be stacked in a single tensor or not. Returns ------- Predictions that have been combined. If ``stacked`` is True, the output is a one tensor. Otherwise, the output is a list of tensors. """ # Case 1 : The Deep Ensemble outputs a variance and a mean for each model if self.layers[-1].get_config()['activation'] == 'MeanVarianceActivation': # Compute the mean accros the model mean_prediction = tf.reduce_mean(predictions[:,:,:,0], axis=1) # Compute the variance accros the model mean_variance_epistemic = tf.reduce_mean(tf.math.pow(predictions[:,:,:,0],2), axis=1) - tf.math.pow(mean_prediction,2) mean_variance_aleatoric = tf.reduce_mean(predictions[:,:,:,1], axis=1) mean_variance = mean_variance_epistemic + mean_variance_aleatoric if stacked == False: return mean_prediction, mean_variance else: return tf.stack((mean_prediction, mean_variance_epistemic, mean_variance_aleatoric), axis=-1).numpy() # Case 2 : The Deep Ensemble has a standard structure else: # Compute the mean accros the model mean_prediction = tf.reduce_mean(predictions, axis=1) # Compute the variance accros the model mean_variance = tf.reduce_mean(tf.math.pow(predictions,2), axis=1) - tf.math.pow(mean_prediction,2) if stacked == False: return mean_prediction, mean_variance else: return tf.stack((mean_prediction, mean_variance), axis=-1).numpy()
[docs] def compute_metrics(self, x, y, predictions, sample_weight): """ Custom ``compute_metrics`` method. As stated in the parent method ``compute_metrics``, this method called the parent function with the appropriate ``y_pred`` and ``stochastic_predictions`` arguments. Arguments --------- x : tf.Tensor Input data. y : tf.Tensor Target data. predictions : tf.Tensor Predictions returned by the model (output of ``model(x)``) sample_weight : optional Sample weights for weighting the loss function. Returns ------- See parent method. """ y_pred, stochastic_predictions = self._combine_predictions(predictions, stacked=False) return super(DeepEnsembleModel, self).compute_metrics(x, y, y_pred, stochastic_predictions, sample_weight)
[docs] def predict(self, x, **kwargs): """Combine predictions made by all the models. This method just called the parent's method and then combine predictions in order to quantify uncertainty. Arguments ---------- x : tf.Tensor Input data. kwargs : optional Other Arguments of the `predict` parent's method. Returns ------- np.ndarray Predictions made by the Deep Ensemble model. """ # Compute the predictions predictions = super(DeepEnsembleModel, self).predict(x, **kwargs) return self._combine_predictions(predictions, stacked=True)
[docs]def toDeepEnsemble(net, nb_models): """Convert a regular model into a deep ensemble model. This method intends to be high-level interface to construct a Deep Ensemble model from a regular model. At present, only the densely-connected NN is compatible with a fully parallelizable implementation. Other architecture are just concatenated models. Parameters ---------- net : tf.keras.Sequential or tf.keras.Model a tensorflow model nb_models : int the number of models Return ------ :class:`DeepEnsembleModel` a Deep Ensemble Model TODO ---- Add support for other architectures """ # If the model is not built, raises a ValueError to # ask to build the model if not net.built: raise ValueError( 'This model has not yet been built. ' 'Build the model first by calling `build()` or by calling ' 'the model on a batch of data.') # Check whether the net is compatible with the fully parallelizable implementation is_compatible = np.all(list(map(lambda layer : isinstance(layer,(Dense, Dense2Dto3D,InputLayer)), net.layers))) if is_compatible: print(f'Your network is compatible with a fully parallelizable implementation.') # The net can come from a tf.keras.Sequential or tf.keras.Model. # The DeepEnsembleModel is a subclass of tf.keras.Model so it needs # an Input layer. That's why, if the original net is from tf.keras.Sequential # and doesn't contain an Input layer, we have to add it from the shape given # in the first layer. first_layer = net.layers[0] config = first_layer.get_config() if isinstance(first_layer, InputLayer): inputs = Input(**config) net.layers.pop(0) else: inputs = Input(shape=config["batch_input_shape"][1:], name = 'input') x = inputs # Iterate over all layers to convert them to the right type for layer in net.layers: config = layer.get_config() config['name'] = 'ensemble_' + config['name'] # Convert Dense layer to Dense2Dto3D or Dense3Dto3D according to the input shape. if isinstance(layer, Dense): if len(x.shape)==2: config['units_dim1'] = nb_models config['units_dim2'] = config.pop('units') x = Dense2Dto3D(**config)(x) elif len(x.shape)==3: x = Dense3Dto3D(**config)(x) # Convert Dense2Dto3D to Dense3Dto4D. elif isinstance(layer, Dense2Dto3D): x = Dense3Dto4D(**config)(x) # Return and construct an instance of the DeepEnsembleModel return DeepEnsembleModel(inputs=inputs, outputs=x) else: print(f'Your network is not compatible with a fully parallelizable implementation. The' f'Deep Ensemble will just be a concatenation of the same model multiple times.')