Source code for purestochastic.model.swag

import keras
import tensorflow as tf
import numpy as np
from purestochastic.model.base_uncertainty_models import StochasticModel
from purestochastic.model.deep_ensemble import toDeepEnsemble


[docs]class SWAGCallback(keras.callbacks.Callback): r"""Approximation of the posterior distribution of parameters as a gaussian distribution. Callback used in the class :class:`SWAGModel` and :class:`MultiSWAGModel`. It allows to approximate the posterior distribution of the parameters as a gaussian distribution. The parameters of the gaussian distribution are computed as follows : * The mean of the gaussian is the mean of the parameters (first moment) found during the training process. Mathematically, it is defined as : .. math:: \theta_{SWA} = \frac{1}{T} \sum_{t=1}^T \theta_t * The covariance matrix is constructed by taking half of a diagonal approximation and half of a low-rank approximation of the covariance matrix. The diagonal approximation is computed at the end of the training by using the first and second order moments of the parameters : .. math:: \Sigma_{Diag} = diag(\bar{\theta}^2-\theta_{SWA}^2) The low-rank approximation is constructed by using the difference of the last K values of the parameters with the mean value of the parameters : .. math:: \Sigma_{low-rank} = \frac{1}{K-1}.\hat{D}\hat{D}^T \text{ avec chaque colonne de D } D_t=(\theta_t - \bar{\theta}_t) To sample from this gaussian distribution, the SWAGModel and MultiSWAGModel use the following equation : .. math:: \theta_j = \theta_{SWA} +\frac{1}{\sqrt{2}}.\Sigma_{diag}^{\frac{1}{2}}n_1 + \frac{1}{\sqrt{2(K-1)}}\hat{D}n_2, ~~ n_1, n_2 \sim \mathcal{N}(0,I) It is then sufficient to store the matrix D, the first order moments of the parameters as well as the diagonal approximation of the covariance at the end of the training. Parameters ----------- learning_rate : float The learning rate of the optimizer. update_frequency: int The number of epochs between two updates of the first and second moments of the parameters. K: int The number of samples used to compute the second order moments. """ def __init__(self, learning_rate, update_frequency, K): super(SWAGCallback, self).__init__() self.learning_rate = learning_rate self.update_frequency = update_frequency self.K = K
[docs] def on_epoch_end(self, epoch, logs=None): r"""Updates first and second order moments as well as deviation matrix. Every ``update_frequency`` epochs, the first and second order moments as well as deviation matrix are updated : .. math:: \bar{\theta} = \frac{n \bar{\theta} + \theta_{epochs}}{n+1} .. math:: \bar{\theta}^2 = \frac{n \bar{\theta}^2 + \theta_{epochs}^2}{n+1} .. math:: \text{APPEND_COL}(\hat{D}, \theta_{epochs}-\bar{\theta}) If the matrix D has more than K columns, the oldest columns is removed. Parameters ---------- epoch : int The number of the actual epoch. """ # Initialize moments if epoch == 0: weights = self.model.get_weights() # Initialize order 2 moments, order 1 moments and deviation matrix self.order1_moments = [] self.order2_moments = [] self.deviation_matrix = [] for i, array in enumerate(weights): array = array.reshape(-1) self.order1_moments.append(array) self.order2_moments.append(np.power(array,2)) self.deviation_matrix.append((array - self.order1_moments[i]).reshape(-1,1)) # Update moments by averaging the parameters elif epoch > 0: if epoch % self.update_frequency == 0: n = epoch/self.update_frequency weights = self.model.get_weights() for i, array in enumerate(weights): array = array.reshape(-1) # Update the moments self.order1_moments[i] = (n*self.order1_moments[i] + array)/(n+1) self.order2_moments[i] = (n*self.order2_moments[i] + np.power(array,2))/(n+1) # Update the deviation matrix if self.deviation_matrix[i].shape[1] >= self.K: self.deviation_matrix[i] = np.delete(self.deviation_matrix[i], 0, axis=1) self.deviation_matrix[i] = np.hstack((self.deviation_matrix[i], (array - self.order1_moments[i]).reshape(-1,1)))
[docs] def on_train_end(self, logs=None): """ Compute and store the variables needed to sample the posterior distribution. The mean of the gaussian distribution is saved in the attribute ``SWA_weights`` of the model. The deviation matrix used in the covariance matrix is saved in the attribute ``deviation_matrix`` of the model. Finally, the root of the diagonal matrix used in the covariance matrix is computed and saved in the attribute ``SWA_cov`` of the model. Parameters ---------- logs : optional See tf.keras.callbacks.Callback """ # Save the SWA weights self.model.SWA_weights = [] for i, array in enumerate(self.order1_moments): self.model.SWA_weights.append(array) # Compute the element needed for the covariance matrix self.model.SWA_cov = [] self.model.deviation_matrix = self.deviation_matrix for i, array in enumerate(self.order2_moments): self.model.SWA_cov.append( np.sqrt(np.maximum(array - np.power(self.model.SWA_weights[i],2),0)))
[docs]class SWAGModel(StochasticModel): """ Implementation of the SWAG Model. The SWAG [2]_ (Stochastic Weight Averaging Gaussian) is a model to make bayesian inference and training to quantify uncertainty. For more details, see :class:`SWAGCallback`. The model can be constructed manually or it's possible to use the method `toSWAG` to convert a simple :class:`keras.Model` object into a :class:`SWAGModel` object. Methods ------- fit(X, y, start_averaging=10, learning_rate=0.001, update_frequency=1, K=10): Trains the model with the SWAG algorithm. _sample_prediction(data, S, verbose=0): Sample different prediction according to the posterior distribution of the parameters. _combine_predictions(predictions, stacked): Combine the sampled predictions. compute_metrics(x, y, predictions, sample_weight): Specify the mean and stochastic part of the predictions to compute the metrics. predict(data, S=5, verbose=0): Computes the predictions of the model with the SWAG algorithm. evaluate(x=None, y=None, S=5, sample_weight=None): Evaluate the model with the SWAG algorithm. References ---------- .. [2] Wesley J. Maddox et al. « A simple baseline for Bayesian uncertainty in deep learning ». In : Advances in Neural Information Processing Systems 32.NeurIPS (2019), p. 1-25. issn : 10495258. arXiv : 1902.02476. """
[docs] def fit(self, X, y, start_averaging=10, learning_rate=0.001, update_frequency=1, K=10, **kwargs): """Train the model with the SWAG algorithm. The model is trained in two parts : * Before ``start_averaging`` epochs, the model is trained normally. It's defined as the pretraining of the model and the training uses the optimizer and learning rate specified in the ``compile`` function. * After ``start_averaging`` epochs, the model is trained with the SWAG callback. In other words, at the end of specific epochs (according to parameters), the parameters of the model are saved. At the end of the training, the callback computes the parameters of the approximated posterior gaussian distribution. The parameters are then used in ``_sample_prediction`` in order to sample different predictions. At present, the optimizer is necessarily the SGD optimizer. See Also --------- src.model.swag.SWAGCallback Parameters ---------- X: np.ndarray The input data. y: np.ndarray The target data. start_averaging: int The number of epochs to pretrain the model. learning_rate: float The learning rate of the SWAG algorithm (second part). update_frequency: int The number of epochs between each save of parameters of the SWAG algorithm. K: int The number of samples used to compute the covariance matrix. Returns ------- History of the SWAG's training. """ # Store the number of epochs given in kwargs and remove it from kwargs epochs = kwargs['epochs'] del kwargs['epochs'] # Make the pretraining of the model with the specified optimizer if kwargs.get("verbose") == 1: print("############ Pretraining ############") results_predict = super(SWAGModel, self).fit(X, y, epochs=start_averaging, **kwargs) # Make the epochs with the SWAG strategy if kwargs.get("verbose") == 1: print("############ SWAG algorithm ############") self.compile(loss=self.loss, optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), metrics=self.compiled_metrics._metrics, stochastic_metrics=self.stochastic_metrics) results_predict = super(SWAGModel, self).fit(X, y, epochs=epochs-start_averaging, callbacks=[SWAGCallback(learning_rate, update_frequency, K)], **kwargs) return results_predict
[docs] def _sample_prediction(self, data, S, verbose=0): """Sample predictions according to the posterior distribution of the parameters. In the SWAG algorithm, the posterior distribution of the parameters is approximated as a Gaussian Distribution. The mean and the covariance are specified in the report associated with the code or in the article [2]_. The mean has been stored in the variable ``SWA_weights``. The diagonal and the Kth-rank approximation of the covariance matrix have been stored respectively in ``SWA_cov`` and ``deviation_matrix``. The method samples the weights and computes the prediction associated multiple times. Parameters ---------- data : tf.Tensor Input data (equivalent to x). S: int The number of samples used in the Monte Carlo method. verbose: int, default:0 The verbosity level. Return ------ preds : tf.Tensor The batch of S predictions. """ final_weights = self.get_weights() # Set up random generator rng = np.random.default_rng() # Bayesian Model Averaging for s in range(S): weights_iter = [] # Draw the value of the parameters for i, array in enumerate(final_weights): K = self.deviation_matrix[i].shape[1] weights_iter.append((self.SWA_weights[i] + self.SWA_cov[i]*rng.standard_normal(self.SWA_weights[i].shape[0])/np.sqrt(2) + np.dot(self.deviation_matrix[i], rng.standard_normal(K))/np.sqrt(2*(K-1))).reshape(array.shape)) # Set the weights for the iterations self.set_weights(weights_iter) # Compute the predictions pred_iter = super(SWAGModel, self).predict(data, verbose=verbose) # Initialize the predictions if s==0: preds = np.zeros((S,) + pred_iter.shape, dtype=np.float32) # Store the predictions preds[s] = pred_iter return preds
[docs] def _combine_predictions(self, predictions, stacked): """ Bayesian Model Averaging of the S predictions. This method follows the ``_sample_prediction`` method. It takes in input the batch of S predictions sampled from ``_sample_prediction`` method. Then, it averages the predictions in order to compute the mean and the uncertainty associated with the prediction. The computation of uncertainty and the mean prediction is different according to the structure of the network. For the moment, there are 2 possibilities (S=number of samples): * Mean Variance Activation (see method ``MeanVarianceActivation``)): * Mean : :math:`\hat{\mu} = \dfrac{1}{S} \sum_{i=1}^{S} \hat{\mu}_i` * Epistemic Variance : :math:`\hat{\sigma}^2_{epi} = \dfrac{1}{S} \sum_{i=1}^{S} (\hat{y}_i - \hat{\mu})^2` * Aleatoric Variance : :math:`\hat{\sigma}^2_{alea} = \dfrac{1}{S} \sum_{i=1}^{S} (\sigma^2_i)` * No specific structure * Mean : :math:`\hat{y} = \dfrac{1}{S} \sum_{i=1}^{S} \hat{y}_i` * Variance : :math:`\hat{\sigma}^2 = \dfrac{1}{S} \sum_{i=1}^{S} (\hat{y}_i - \hat{y})^2` In the future, it would be possible to add other possibilities. Parameters ---------- predictions : tf.Tensor Batch of the S predictions computed by ``_sample_prediction``. stacked : boolean Boolean to indicate wheter the output should be stacked in a single tensor or not. """ # Case 1 : The Deep Ensemble outputs a variance and a mean for each model if self.layers[-1].get_config()['activation'] == 'MeanVarianceActivation': mean_prediction = tf.reduce_mean(predictions[:,:,:,0], axis=0) mean_variance_epistemic = tf.reduce_mean(np.power(predictions[:,:,:,0],2), axis=0) - tf.math.pow(mean_prediction,2) mean_variance_aleatoric = tf.reduce_mean(predictions[:,:,:,1], axis=0) mean_variance = mean_variance_epistemic + mean_variance_aleatoric if stacked == False: return mean_prediction, mean_variance else: return tf.stack((mean_prediction, mean_variance_epistemic,mean_variance_aleatoric), axis=-1).numpy() # Case 2 : The Deep Ensemble has a standard structure else: mean_prediction = tf.reduce_mean(predictions, axis=0) mean_variance = tf.math.reduce_variance(predictions, axis=0) if stacked == False: return mean_prediction, mean_variance else: return tf.stack((mean_prediction, mean_variance), axis=-1).numpy()
[docs] def compute_metrics(self, x, y, predictions, sample_weight): """ Custom ``compute_metrics`` method. As stated in the parent method ``compute_metrics``, this method called the parent function with the appropriate ``y_pred`` and ``stochastic_predictions`` arguments. Warning ------- Unless the model predicts aleatoric uncertainty, the model can't compute stochastic metrics before the end of the training. Arguments --------- x : tf.Tensor Input data. y : tf.Tensor Target data. predictions : tf.Tensor Predictions returned by the model (output of `model(x)`) sample_weight : optional Sample weights for weighting the loss function. Returns ------- See parent method. """ if self.layers[-1].get_config()['activation'] == 'MeanVarianceActivation': y_pred = predictions[:,:,0] variance = predictions[:,:,1] return super(SWAGModel, self).compute_metrics(x, y, y_pred, variance, sample_weight) else: print("Warning : Impossible to compute stochastic metrics before the end of the training.") return super(StochasticModel, self).compute_metrics(x, y, predictions, sample_weight)
[docs] def predict(self, data, S=5, verbose=0): """ Sample predictions and combine them. This method defines the inference step of the SWAG algorithm. First, it samples predictions of the model with the ``_sample_prediction`` method. Then, the predictions are combined with the method ``_combine_predictions``. Parameters ---------- data: numpy.ndarray The input data. S: int, default:5 The number of samples used in the Monte Carlo method. verbose: int, default:0 The verbosity level. Returns ------- The predictions of the model. """ # Sample and compute the predictions predictions = self._sample_prediction(data, S, verbose=verbose) # Combine predictions return self._combine_predictions(predictions, stacked=True)
[docs] def evaluate(self, x=None, y=None, S=5, sample_weight=None): """ Custom ``evaluate`` method. It returns the loss value & metrics values for the model in test mode. Parameters ---------- x : tf.Tensor Input data. y : tf.Tensor Target data S : int, default:5 The number of samples used in the Monte Carlo method. sample_weight : optional Sample weights for weighting the loss function. Return ------ Dict containing the values of the metrics and loss of the model. """ # Sample and compute the predictions predictions = self._sample_prediction(x, S, verbose=0) # Combine predictions y_pred, stochastic_predictions = self._combine_predictions(predictions, stacked=False) return keras.utils.tf_utils.sync_to_numpy_or_python_type(super(SWAGModel, self).compute_metrics(x, tf.convert_to_tensor(y), y_pred, stochastic_predictions, sample_weight))
[docs]def toSWAG(net): """Convert a regular model into a SWAG model. This method intends to be high-level interface to construct a SWAG model from a regular model. At present, only the densely-connected NN is compatible with a fully parallelizable implementation. Other architecture are just concatenated models. Parameters ---------- net : :class:`tf.keras.Sequential` or :class:`tf.keras.Model` a tensorflow model nb_models : int the number of models Return ------ :class:`SWAGModel` a SWAG Model """ return SWAGModel.from_config(net.get_config())
[docs]class MultiSWAGModel(StochasticModel): """ Implementation of the MultiSWAG Model. The MultiSWAG [3]_ (Multi Stochastic Weight Averaging Gaussian) is an ensemble of SWAG Model. It's a mix between a DeepEnsemble and SWAG Model. For more details, see :class:`SWAGCallback`, :class:`SWAGModel` and :class:`DeepEnsembleModel`. The model can be constructed manually or it's possible to use the method ``toMultiSWAG`` to convert a simple :class:`keras.Model` object into a `:class:MultiSWAGModel` object. This class don't need specific loss function and can't use all of the tensorflow loss function and also custom loss functions. Methods ------- fit(X, y, start_averaging=10, learning_rate=0.001, update_frequency=1, K=10): Trains the model with the MultiSWAG algorithm. _sample_prediction(data, S, verbose=0): Sample different prediction according to the posterior distribution of the parameters. _combine_predictions(predictions, stacked): Combine the sampled predictions made by all models. compute_metrics(x, y, predictions, sample_weight): Specify the mean and stochastic part of the predictions to compute the metrics. evaluate(x=None, y=None, S=5, sample_weight=None): Evaluate the model with the MultiSWAG algorithm. predict(data, S=5, verbose=0): Computes the predictions of the model with the MultiSWAG algorithm. References ---------- .. [3] Andrew Gordon Wilson et Pavel Izmailov. « Bayesian deep learning and a probabilistic perspective of generalization ». In : Advances in Neural Information Processing Systems 2020- Decem.3 (2020). issn : 10495258. arXiv : 2002.08791. """
[docs] def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None): """ Custom ``compute_loss`` function. This method overrides the ``compute_loss`` function so that the class doesn't need specific loss function. It computes the loss for each model independently. It's the same function as in :class:`DeepEnsembleModel`. Arguments --------- x : tf.Tensor Input data. y : tf.Tensor Target data. y_pred : tf.Tensor Predictions returned by the model (output of ``model(x)``) sample_weight : optional Sample weights for weighting the loss function. Returns ------- The total loss. """ def compute_loss_single_model(ytilde): return self.compiled_loss(y, ytilde, sample_weight, regularization_losses=self.losses) return tf.reduce_mean(tf.vectorized_map(compute_loss_single_model, tf.transpose(y_pred, (1,0) + tuple([i+2 for i in range(0, len(y_pred.shape)-2)]) )))
[docs] def fit(self,X, y, start_averaging=10, learning_rate=0.001, update_frequency=1, K=10, **kwargs): """Train the model with the MultiSWAG algorithm. It's the same function as in :class:`SWAGModel` but with multiple models trained independently. The models are trained in two parts : * Before ``start_averaging`` epochs, the models are trained normally. It's defined as the pretraining of the models and the training uses the optimizer and learning rate specified in the ``compile`` function. * After ``start_averaging`` epochs, the models are trained with the SWAG callback. In other words, at the end of specific epochs (according to parameters), the parameters of the models are saved. At the end of the training, the callback computes the parameters of the approximated posterior gaussian distribution. The parameters are then used in ``_sample_prediction`` in order to sample different predictions. At present, the optimizer is necessarily the SGD optimizer. For more details, see :class:`SWAGCallback`. Parameters ---------- X: np.ndarray The input data. y: np.ndarray The target data. start_averaging: int The number of epochs to pretrain the model. learning_rate: float The learning rate of the MultiSWAG algorithm (second part). update_frequency: int The number of epochs between each save of parameters of the MultiSWAG algorithm. K: int The number of samples used to compute the covariance matrix. Returns ------- History of the MultiSWAG's training. """ # Store the number of epochs given in kwargs and remove it from kwargs epochs = kwargs['epochs'] del kwargs['epochs'] # Make the pretraining of the model with the specified optimizer if kwargs.get("verbose") == 1: print("############ Pretraining ############") results_predict = super(MultiSWAGModel, self).fit(X, y, epochs=start_averaging, **kwargs) # Make the epochs with the SWAG strategy if kwargs.get("verbose") == 1: print("############ MultiSWAG algorithm ############") self.compile(loss=self.loss, optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), metrics=self.compiled_metrics._metrics, stochastic_metrics=self.stochastic_metrics) results_predict = super(MultiSWAGModel, self).fit(X, y, epochs=epochs-start_averaging, callbacks=[SWAGCallback(learning_rate, update_frequency, K)], **kwargs) return results_predict
[docs] def _sample_prediction(self,data, S, verbose=0): """Sample predictions according to the posterior distribution of the parameters. It's the same function as in :class:`SWAGModel`. In the MultiSWAG algorithm, the posterior distribution of the parameters is approximated as a Gaussian Distribution. The mean and the covariance are specified in the report associated with the code or in the article. The mean has been stored in the variable ``SWA_weights``. The diagonal and the Kth-rank approximation of the covariance matrix have been stored respectively in ``SWA_cov`` and ``deviation_matrix``. The method samples the weights and computes the prediction associated multiple times for each model independently. Parameters ---------- data : tf.Tensor Input data (equivalent to x). S: int The number of samples used in the Monte Carlo method. verbose: int, default:0 The verbosity level. Return ------ preds : tf.Tensor The batch of S predictions. """ final_weights = self.get_weights() # Set up random generator rng = np.random.default_rng() # Bayesian Model Averaging for s in range(S): weights_iter = [] # Draw the value of the parameters for i, array in enumerate(final_weights): K = self.deviation_matrix[i].shape[1] weights_iter.append((self.SWA_weights[i] + self.SWA_cov[i]*rng.standard_normal(self.SWA_weights[i].shape[0])/np.sqrt(2) + np.dot(self.deviation_matrix[i], rng.standard_normal(K))/np.sqrt(2*(K-1))).reshape(array.shape)) # Set the weights for the iterations self.set_weights(weights_iter) # Compute the predictions pred_iter = super(MultiSWAGModel, self).predict(data, verbose=verbose) # Initialize the predictions if s==0: preds = np.zeros((S, ) + pred_iter.shape, dtype=np.float32) # Store the predictions preds[s] = pred_iter return preds
[docs] def _combine_predictions(self, predictions, sampled, stacked): """ Bayesian Model Averaging of the S predictions of the B models. It's a little bit different from the function in :class:`SWAGModel`. There is 2 cases : * If sampled is False, the parameters of the posterior distribution have not been computed yet and so it's impossible to sample predictions. Therefore, the function just combines the predictions made by all the models as in the :class:`DeepEnsembleModel`. * If sampled is True, the parameters have been computed. So, this method follows the ``_sample_prediction`` method. It takes in input the batch of S predictions for each model sampled from ``_sample_prediction`` method. Then, it averages the predictions over the samples and the models in order to compute the mean and the uncertainty associated with the prediction. The computation of uncertainty and the mean prediction is different according to the structure of the network. For the moment, there are 2 possibilities (B = number of models, S = number of samples) : * Mean Variance Activation (see method ``MeanVarianceActivation``)): * Mean : :math:`\hat{\mu} = \dfrac{1}{B*S} \sum_{i=1}^{B} \sum_{j=1}^{S} \hat{\mu}_{i,j}` * Epistemic Variance : :math:`\hat{\sigma}^2_{epi} = \dfrac{1}{B*S} \sum_{i=1}^{B} \sum_{j=1}^{S} (\hat{y}_{i,j} - \hat{\mu})^2` * Aleatoric Variance : :math:`\hat{\sigma}^2_{alea} = \dfrac{1}{B*S} \sum_{i=1}^{B} \sum_{j=1}^{S} (\sigma^2_{i,j})` * No specific structure : * Mean : :math:`\hat{y} = \dfrac{1}{B*S} \sum_{i=1}^{B} \sum_{j=1}^{S} \hat{y}_{i,j}` * Variance : :math:`\hat{\sigma}^2 = \dfrac{1}{B*S} \sum_{i=1}^{B} \sum_{j=1}^{S} (\hat{y}_{i,j} - \hat{y})^2` In the future, it would be possible to add other possibilities. Parameters ---------- predictions : tf.Tensor Batch of the S predictions for each model computed by ``_sample_prediction``. sampled : boolean Boolean to indicate wheter the input have been sampled. stacked : boolean Boolean to indicate wheter the output should be stacked in a single tensor or not. """ # Case 1 : The Deep Ensemble outputs a variance and a mean for each model if self.layers[-1].get_config()['activation'] == 'MeanVarianceActivation': average_axis = (0,2) if sampled == True else (1, ) mean, variance = tf.unstack(predictions, axis=-1) mean_prediction = tf.reduce_mean(mean, axis=average_axis) mean_variance_epistemic = tf.reduce_mean(tf.math.pow(mean,2), axis=average_axis) - tf.math.pow(mean_prediction,2) mean_variance_aleatoric = tf.reduce_mean(variance, axis=average_axis) mean_variance = mean_variance_aleatoric + mean_variance_epistemic if stacked == False: return mean_prediction, mean_variance else: return tf.stack((mean_prediction, mean_variance_epistemic,mean_variance_aleatoric), axis=-1).numpy() # Case 2 : The Deep Ensemble has a standard structure else: average_axis = (0,2) if sampled == True else (1, ) mean_prediction = tf.reduce_mean(predictions, axis=average_axis) mean_variance = tf.math.reduce_variance(predictions, axis=average_axis) if stacked == False: return mean_prediction, mean_variance else: return tf.stack((mean_prediction, mean_variance), axis=-1).numpy()
[docs] def compute_metrics(self, x, y, prediction, sample_weight): """ Custom ``compute_metrics`` method. As stated in the parent method ``compute_metrics``, this method called the parent function with the appropriate ``y_pred`` and ``stochastic_predictions`` arguments. Arguments --------- x : tf.Tensor Input data. y : tf.Tensor Target data. predictions : tf.Tensor Predictions returned by the model (output of `model(x)`) sample_weight : optional Sample weights for weighting the loss function. Returns ------- See parent method. """ y_pred, stochastic_predictions = self._combine_predictions(prediction, sampled=False, stacked=False) return super(MultiSWAGModel, self).compute_metrics(x, y, y_pred, stochastic_predictions, sample_weight)
[docs] def predict(self, data, S=5, verbose=0): """ Sample predictions and combine them. It's the same function as in :class:`SWAGModel` This method defines the inference step of the MultiSWAG algorithm. First, it samples predictions of each model with the ``_sample_prediction`` method. Then, all the predictions are combined with the method ``_combine_predictions``. Parameters ---------- data: np.ndarray The input data. S: int, default:5 The number of samples used in the Monte Carlo method. verbose: int, default:0 The verbosity level. Returns ------- The predictions of the model. """ # Sample and compute the predictions predictions = self._sample_prediction(data, S, verbose=0) # Combine predictions return self._combine_predictions(predictions, sampled=True, stacked=True)
[docs] def evaluate(self, x=None, y=None, S=5, sample_weight=None): """ Custom ``evaluate`` method. It returns the loss value & metrics values for the model in test mode. Parameters ---------- x : tf.Tensor Input data. y : tf.Tensor Target data S : int, default:5 The number of samples used in the Monte Carlo method. sample_weight : optional Sample weights for weighting the loss function. Return ------ Dict containing the values of the metrics and loss of the model. """ # Sample and compute the predictions predictions = self._sample_prediction(x, S, verbose=0) # Combine predictions y_pred, stochastic_predictions = self._combine_predictions(predictions, sampled=True, stacked=False) return keras.utils.tf_utils.sync_to_numpy_or_python_type(super(MultiSWAGModel, self).compute_metrics(x, tf.convert_to_tensor(y), y_pred, stochastic_predictions, sample_weight))
[docs]def toMultiSWAG(net, nb_models): """Convert a regular model into a MultiSWAG model. This method intends to be high-level interface to construct a MultiSWAG model from a regular model. At present, only the densely-connected NN is compatible with a fully parallelizable implementation. Other architecture are just concatenated models. Parameters ---------- net : :class:`tf.keras.Sequential` or :class:`tf.keras.Model` a tensorflow model nb_models : int the number of models Return ------ :class:`MultiSWAGModel` a MultiSWAG Model """ deepEnsemble = toDeepEnsemble(net, nb_models) return MultiSWAGModel.from_config(deepEnsemble.get_config())