Source code for purestochastic.model.deep_ensemble

import tensorflow as tf
from tensorflow import keras
import numpy as np
from purestochastic.model.layers import *
from purestochastic.model.activations import *
from purestochastic.model.base_uncertainty_models import *
from keras.layers import InputLayer, Dense, Input
from purestochastic.common.metrics import *

[docs]class DeepEnsembleModel(StochasticModel):
    """ Implementation of the DeepEnsemble model.

    The Deep Ensemble [1]_ is an ensemble of Deep Learning model trained independently and 
    combined for prediction in order to estimate uncertainty.

    The model can be constructed manually or it's possible to use the method ``toDeepEnsemble``
    to convert a simple :class:`keras.Model` object into a :class:`DeepEnsembleModel` object. 
    This class don't need specific loss function and can't use all of the tensorflow loss 
    function and also custom loss functions.

    Methods
    -------
    compute_loss(x=None, y=None, y_pred=None, sample_weight=None):
        Compute the loss independently for each model.
    _combine_predictions(predictions, stacked):
        Combine the predictions made by the models.
    compute_metrics(x, y, predictions, sample_weight):
        Specify the mean and stochastic part of the predictions to compute the metrics.
    predict(x):
        Compute the predictions of the model thanks to the `_combine_predictions` method.

    References
    ----------
    .. [1] Balaji Lakshminarayanan, Alexander Pritzel et Charles Blundell. « Simple and scalable
        predictive uncertainty estimation using deep ensembles ». In : Advances in Neural Information
        Processing Systems 2017-Decem.Nips (2017), p. 6403-6414. issn : 10495258. arXiv : 1612.01474.
    """

[docs]    def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None):
        """ Custom ``compute_loss`` function.

        This method overrides the ``compute_loss`` function so that the class doesn't 
        need specific loss function. It computes the loss for each model independently.

        Arguments
        ---------
        x : tf.Tensor
            Input data.
        y : tf.Tensor
            Target data.
        y_pred : tf.Tensor
            Predictions returned by the model (output of ``model(x)``)
        sample_weight : optional
           Sample weights for weighting the loss function.
        
        Returns
        -------
        The total loss.
        """

        # Define a function that computes the loss for one model
        def compute_loss_single_model(ytilde):

            return self.compiled_loss(y, ytilde, sample_weight, regularization_losses=self.losses)

        # Parallelization of the computation
        return tf.reduce_mean(tf.vectorized_map(compute_loss_single_model, tf.transpose(y_pred, (1,0) + tuple([i+2 for i in range(0, len(y_pred.shape)-2)]) )))

[docs]    def _combine_predictions(self, predictions, stacked):
        r""" Combine the predictions of all the models in order to quantify the uncertainty.
        
        This method combines the prediction of all the models in order to quantify uncertainty.
        The computation of uncertainty and the mean prediction is different according to the 
        structure of the network. For the moment, there are 2 possibilities (B = number of models): 

            * Mean Variance Activation (see method ``MeanVarianceActivation``)): 

                * Mean : :math:`\hat{\mu} = \dfrac{1}{B} \sum_{i=1}^{B} \hat{\mu}_i`
                * Epistemic Variance : :math:`\hat{\sigma}^2_{epi} = \dfrac{1}{B} \sum_{i=1}^{B} (\hat{y}_i - \hat{\mu})^2`
                * Aleatoric Variance : :math:`\hat{\sigma}^2_{alea} = \dfrac{1}{B} \sum_{i=1}^{B} (\sigma^2_i)`
            * No specific structure : 
            
                * Mean : :math:`\hat{y} = \dfrac{1}{B} \sum_{i=1}^{B} \hat{y}_i`
                * Variance : :math:`\hat{\sigma}^2 = \dfrac{1}{B} \sum_{i=1}^{B} (\hat{y}_i - \hat{y})^2`

        In the future, it will be possible to add other possibilities.

        Arguments
        ---------
        predictions : tf.Tensor
            Predictions returned by the model (output of ``model(x)``)
        stacked : boolean
            Boolean to indicate wheter the output should be stacked in a single tensor or not.

        Returns
        -------
        Predictions that have been combined. If ``stacked`` is True, the output is a one tensor.
        Otherwise, the output is a list of tensors.
        """

        # Case 1 : The Deep Ensemble outputs a variance and a mean for each model
        if self.layers[-1].get_config()['activation'] == 'MeanVarianceActivation':

            # Compute the mean accros the model
            mean_prediction = tf.reduce_mean(predictions[:,:,:,0], axis=1)

            # Compute the variance accros the model
            mean_variance_epistemic = tf.reduce_mean(tf.math.pow(predictions[:,:,:,0],2), axis=1) - tf.math.pow(mean_prediction,2)
            mean_variance_aleatoric = tf.reduce_mean(predictions[:,:,:,1], axis=1)
            mean_variance = mean_variance_epistemic + mean_variance_aleatoric

            if stacked == False:
                return mean_prediction, mean_variance
            else:
                return tf.stack((mean_prediction, mean_variance_epistemic, mean_variance_aleatoric), axis=-1).numpy()
        
        # Case 2 : The Deep Ensemble has a standard structure
        else:

            # Compute the mean accros the model
            mean_prediction = tf.reduce_mean(predictions, axis=1)

            # Compute the variance accros the model
            mean_variance = tf.reduce_mean(tf.math.pow(predictions,2), axis=1) - tf.math.pow(mean_prediction,2)

            if stacked == False:
                return mean_prediction, mean_variance
            else:
                return tf.stack((mean_prediction, mean_variance), axis=-1).numpy()

[docs]    def compute_metrics(self, x, y, predictions, sample_weight):
        """ Custom ``compute_metrics`` method.
        
        As stated in the parent method ``compute_metrics``, this method called the 
        parent function with the appropriate ``y_pred`` and ``stochastic_predictions`` 
        arguments.

        Arguments
        ---------
        x : tf.Tensor
            Input data.
        y : tf.Tensor
            Target data.
        predictions : tf.Tensor
            Predictions returned by the model (output of ``model(x)``)
        sample_weight : optional
           Sample weights for weighting the loss function.

        Returns
        -------
        See parent method.
        """

        y_pred, stochastic_predictions = self._combine_predictions(predictions, stacked=False)

        return super(DeepEnsembleModel, self).compute_metrics(x, y, y_pred, stochastic_predictions, sample_weight)

[docs]    def predict(self, x, **kwargs):
        """Combine predictions made by all the models.

        This method just called the parent's method and then combine predictions in order to quantify uncertainty.

        Arguments
        ----------
        x : tf.Tensor
            Input data.
        kwargs : optional
            Other Arguments of the `predict` parent's method.
        
        Returns
        -------
        np.ndarray
            Predictions made by the Deep Ensemble model.
        """

        # Compute the predictions
        predictions = super(DeepEnsembleModel, self).predict(x, **kwargs)

        return self._combine_predictions(predictions, stacked=True)

[docs]def toDeepEnsemble(net, nb_models):
    """Convert a regular model into a deep ensemble model.

    This method intends to be high-level interface to construct
    a Deep Ensemble model from a regular model. At present, only
    the densely-connected NN is compatible with a fully parallelizable 
    implementation. Other architecture are just concatenated models.

    Parameters
    ----------
    net : tf.keras.Sequential or tf.keras.Model
        a tensorflow model

    nb_models : int
        the number of models

    Return
    ------
    :class:`DeepEnsembleModel`
        a Deep Ensemble Model

    TODO
    ----
    Add support for other architectures
    """

    # If the model is not built, raises a ValueError to
    # ask to build the model
    if not net.built:
        raise ValueError(
          'This model has not yet been built. '
          'Build the model first by calling `build()` or by calling '
          'the model on a batch of data.')

    # Check whether the net is compatible with the fully parallelizable implementation
    is_compatible = np.all(list(map(lambda layer : isinstance(layer,(Dense, Dense2Dto3D,InputLayer)), net.layers)))

    if is_compatible:
        print(f'Your network is compatible with a fully parallelizable implementation.')

        # The net can come from a tf.keras.Sequential or tf.keras.Model.
        # The DeepEnsembleModel is a subclass of tf.keras.Model so it needs
        # an Input layer. That's why, if the original net is from tf.keras.Sequential
        # and doesn't contain an Input layer, we have to add it from the shape given
        # in the first layer.
        first_layer = net.layers[0]
        config = first_layer.get_config()
        if isinstance(first_layer, InputLayer):
            inputs = Input(**config)
            net.layers.pop(0)
        else:
            inputs = Input(shape=config["batch_input_shape"][1:], name = 'input')
        x = inputs

        # Iterate over all layers to convert them to the right type
        for layer in net.layers:
            config = layer.get_config()
            config['name'] = 'ensemble_' + config['name']
            # Convert Dense layer to Dense2Dto3D or Dense3Dto3D according to the input shape.
            if isinstance(layer, Dense):
                if len(x.shape)==2:
                    config['units_dim1'] = nb_models
                    config['units_dim2'] = config.pop('units')
                    x = Dense2Dto3D(**config)(x)
                elif len(x.shape)==3:
                    x = Dense3Dto3D(**config)(x)
            # Convert Dense2Dto3D to Dense3Dto4D.
            elif isinstance(layer, Dense2Dto3D):
                x = Dense3Dto4D(**config)(x)
        
        # Return and construct an instance of the DeepEnsembleModel
        return DeepEnsembleModel(inputs=inputs, outputs=x)

    else:
        print(f'Your network is not compatible with a fully parallelizable implementation. The'
        f'Deep Ensemble will just be a concatenation of the same model multiple times.')