Source code for pints._mcmc._dual_averaging

#
# Dual Averaging step size and mass matrix adaption method for NUTS and HMC
# samplers
#
# This file is part of PINTS (https://github.com/pints-team/pints/) which is
# released under the BSD 3-clause license. See accompanying LICENSE.md for
# copyright notice and full license details.
#
import numpy as np


[docs]class DualAveragingAdaption(object): r""" Dual Averaging method to adaptively tune the step size and mass matrix of a Hamiltonian Monte Carlo (HMC) routine (as used e.g. in NUTS). Implements a Dual Averaging scheme to adapt the step size ``epsilon``, as per [1]_ (section 3.2.1 and algorithm 6), and estimates the inverse mass matrix using the sample covariance of the accepted parameter, as suggested in [2]_. The mass matrix can either be given as a fully dense matrix represented as a 2D ndarray, or a diagonal matrix represented as a 1D ndarray. During iteration ``m`` of adaption, the parameter ``epsilon`` is updated using the following scheme: .. math:: \bar{H} = (1 - 1/(m + t_0)) \bar{H} + 1/(m + t_0)(\delta_t - \delta) \text{log} \epsilon = \mu - \sqrt{m}/\gamma \bar{H} where $\delta_t$ is the target acceptence probability set by the user and $\delta$ is the acceptence probability reported by the algorithm (i.e. that is provided as an argument to the :meth:`step` method. The adaption is done using the same windowing method employed by Stan, which is done over three or more windows: - initial window: epsilon is adapted using dual averaging (*no* adaption of the mass matrix). - base window: epsilon continues to be adapted using dual averaging; this adaption completes at the end of this window. The inverse mass matrix is adaped at the end of the window by taking the sample covariance of all parameter points within this window. - terminal window: epsilon is adapted using dual averaging, holding the mass matrix constant, and completes at the end of the window. If the number of warmup steps requested by the user is greater than the sum of these three windows, then additional base windows are added, each with a size double that of the previous window. References ---------- .. [1] Hoffman, M. D., & Gelman, A. (2014). The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo. Journal of Machine Learning Research, 15(1), 1593-1623. .. [2] Betancourt, M. (2018). A Conceptual Introduction to Hamiltonian Monte Carlo. https://arxiv.org/abs/1701.02434. Parameters ---------- num_warmup_steps ??? target_accept_prob ??? init_epsilon An initial guess for the step size epsilon init_inv_mass_matrix An initial guess for the inverse adapted mass matrix """ def __init__(self, num_warmup_steps, target_accept_prob, init_epsilon, init_inv_mass_matrix): # windowing constants (defaults taken from Stan) self._initial_window = 75 self._base_window = 25 self._terminal_window = 50 # windowing counter self._counter = 0 # dual averaging constants (defaults taken from Stan) self._gamma = 0.05 self._t0 = 10.0 self._kappa = 0.75 # variables for dual averaging self._epsilon = init_epsilon # The adapted step size self._mass_matrix = None # The adapted mass matrix (set below) self._inv_mass_matrix = None # The inverse adapted mass matrix self._mu = np.log(10 * self._epsilon) self._log_epsilon_bar = np.log(1) self._h_bar = 0.0 self._adapt_epsilon_counter = 0 self.set_inv_mass_matrix(np.copy(init_inv_mass_matrix)) self._target_accept_prob = target_accept_prob minimum_warmup_steps = self._initial_window + self._terminal_window + \ self._base_window if num_warmup_steps < minimum_warmup_steps: raise ValueError( 'Number of warmup steps less than the minimum value {}'. format(minimum_warmup_steps) ) self._warmup_steps = num_warmup_steps self._next_window = self._initial_window + self._base_window self._adapting = True self.init_sample_covariance(self._base_window) self.init_adapt_epsilon(init_epsilon)
[docs] def adapt_epsilon(self, accept_prob): """ Perform a single step of the dual averaging scheme. """ if accept_prob > 1: accept_prob = 1.0 self._adapt_epsilon_counter += 1 counter = self._adapt_epsilon_counter eta = 1.0 / (counter + self._t0) self._h_bar = (1 - eta) * self._h_bar \ + eta * (self._target_accept_prob - accept_prob) log_epsilon = ( self._mu - (np.sqrt(counter) / self._gamma) * self._h_bar) x_eta = counter**(-self._kappa) self._log_epsilon_bar = x_eta * log_epsilon + \ (1 - x_eta) * self._log_epsilon_bar self._epsilon = np.exp(log_epsilon)
[docs] def add_parameter_sample(self, sample): """ Store the parameter samples to calculate a sample covariance matrix later on. """ self._samples[:, self._num_samples] = sample self._num_samples += 1
[docs] def calculate_sample_variance(self): """ Return the sample covariance of all the stored samples. """ assert self._num_samples == self._samples.shape[1] params = self._samples.shape[0] samples = self._samples.shape[1] if self._inv_mass_matrix.ndim == 1: sample_covariance = np.var(self._samples, axis=1) identity = np.ones(params) else: sample_covariance = np.cov(self._samples) identity = np.eye(params) # adapt the sample covariance in a similar way to Stan return (samples / (samples + 5.0)) * sample_covariance \ + 1e-3 * (5.0 / (samples + 5.0)) * identity
[docs] def final_epsilon(self): """ Perform the final step of the dual averaging scheme. """ return np.exp(self._log_epsilon_bar)
[docs] def get_epsilon(self): """ return the step size """ return self._epsilon
def get_inv_mass_matrix(self): return self._inv_mass_matrix
[docs] def get_mass_matrix(self): """ Return the mass matrix. """ return self._mass_matrix
[docs] def init_adapt_epsilon(self, epsilon): """ Start a new dual averaging adaption for epsilon. """ self._epsilon = epsilon self._mu = np.log(10 * self._epsilon) self._log_epsilon_bar = np.log(1) self._h_bar = 0.0 self._adapt_epsilon_counter = 0
[docs] def init_sample_covariance(self, size): """ Start a new adaption window for the inverse mass matrix. """ n_params = self._inv_mass_matrix.shape[0] self._samples = np.empty((n_params, size)) self._num_samples = 0
[docs] def set_inv_mass_matrix(self, inv_mass_matrix): """ We calculate the mass matrix whenever the inverse mass matrix is set. """ if inv_mass_matrix.ndim == 1: self._mass_matrix = 1.0 / inv_mass_matrix self._inv_mass_matrix = inv_mass_matrix else: try: self._mass_matrix = np.linalg.inv(inv_mass_matrix) except np.linalg.LinAlgError: print('WARNING: adapted mass matrix is ill-conditioned') return self._inv_mass_matrix = inv_mass_matrix
[docs] def step(self, x, accept_prob): """ Perform a single step of the adaption. Parameters ---------- x: ndarray The next accepted mcmc parameter point. accept_prob: float The acceptance probability of the last NUTS/HMC mcmc step. """ if not self._adapting: return self._counter += 1 if self._counter >= self._warmup_steps: self._epsilon = self.final_epsilon() self._adapting = False return False self.adapt_epsilon(accept_prob) if self._counter > self._initial_window: self.add_parameter_sample(x) if self._counter >= self._next_window: self.set_inv_mass_matrix(self.calculate_sample_variance()) if self._counter >= self._warmup_steps - self._terminal_window: self._next_window = self._warmup_steps else: self._base_window *= 2 self._next_window = min( self._counter + self._base_window, self._warmup_steps - self._terminal_window ) self.init_sample_covariance(self._next_window - self._counter) return True #self._epsilon = self.final_epsilon() return False