Source code for nifty8.minimization.stochastic_minimizer

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2021 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.

from .energy import Energy
from .minimizer import Minimizer


[docs] class ADVIOptimizer(Minimizer): """Provide an implementation of an adaptive step-size sequence optimizer, following https://arxiv.org/abs/1603.00788. This stochastic optimizer keeps track of the evolution of the gradient over the last steps to adaptively determine the step-size of the next update. It is a variation of the Adam optimizer for Gaussian variational inference and it allows to optimizer stochastic loss functions. Parameters ---------- steps: int The number of concecutive steps during one call of the optimizer. eta: positive float The scale of the step-size sequence. It might have to be adapted to the application to increase performance. Default: 1. alpha: float between 0 and 1 The fraction of how much the current gradient impacts the momentum. Lower values correspond to a longer memory. tau: positive float This quantity prevents division by zero. epsilon: positive float A small value guarantees Robbins and Monro conditions. resample: bool Whether the loss function is resampled for the next iteration. Stochastic losses require resampleing, deterministic ones not. """
[docs] def __init__(self, controller, eta=1, alpha=0.1, tau=1, epsilon=1e-16, resample=True): self.alpha = alpha self.eta = eta self.tau = tau self.epsilon = epsilon self.counter = 1 self._controller = controller self.s = None self.resample = resample
def _step(self, position, gradient): self.s = self.alpha * gradient ** 2 + (1 - self.alpha) * self.s self.rho = self.eta * self.counter ** (-0.5 + self.epsilon) \ / (self.tau + (self.s).sqrt()) new_position = position - self.rho * gradient self.counter += 1 return new_position
[docs] def __call__(self, energy): from ..utilities import myassert controller = self._controller status = controller.start(energy) if status != controller.CONTINUE: return energy, status if self.s is None: self.s = energy.gradient ** 2 while True: # check if position is at a flat point if energy.gradient_norm == 0: return energy, controller.CONVERGED x = self._step(energy.position, energy.gradient) if self.resample: energy = energy.resample_at(x) myassert(isinstance(energy, Energy)) myassert(x.domain is energy.position.domain) energy = energy.at(x) status = self._controller.check(energy) if status != controller.CONTINUE: return energy, status
[docs] def reset(self): self.counter = 1 self.s = None