Source code for nifty8.minimization.stochastic_minimizer
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2021 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
from .energy import Energy
from .minimizer import Minimizer
[docs]
class ADVIOptimizer(Minimizer):
"""Provide an implementation of an adaptive step-size sequence optimizer,
following https://arxiv.org/abs/1603.00788.
This stochastic optimizer keeps track of the evolution of the gradient over
the last steps to adaptively determine the step-size of the next update.
It is a variation of the Adam optimizer for Gaussian variational inference
and it allows to optimizer stochastic loss functions.
Parameters
----------
steps: int
The number of concecutive steps during one call of the optimizer.
eta: positive float
The scale of the step-size sequence. It might have to be adapted to the
application to increase performance. Default: 1.
alpha: float between 0 and 1
The fraction of how much the current gradient impacts the momentum.
Lower values correspond to a longer memory.
tau: positive float
This quantity prevents division by zero.
epsilon: positive float
A small value guarantees Robbins and Monro conditions.
resample: bool
Whether the loss function is resampled for the next iteration.
Stochastic losses require resampleing, deterministic ones not.
"""
[docs]
def __init__(self, controller, eta=1, alpha=0.1, tau=1, epsilon=1e-16, resample=True):
self.alpha = alpha
self.eta = eta
self.tau = tau
self.epsilon = epsilon
self.counter = 1
self._controller = controller
self.s = None
self.resample = resample
def _step(self, position, gradient):
self.s = self.alpha * gradient ** 2 + (1 - self.alpha) * self.s
self.rho = self.eta * self.counter ** (-0.5 + self.epsilon) \
/ (self.tau + (self.s).sqrt())
new_position = position - self.rho * gradient
self.counter += 1
return new_position
[docs]
def __call__(self, energy):
from ..utilities import myassert
controller = self._controller
status = controller.start(energy)
if status != controller.CONTINUE:
return energy, status
if self.s is None:
self.s = energy.gradient ** 2
while True:
# check if position is at a flat point
if energy.gradient_norm == 0:
return energy, controller.CONVERGED
x = self._step(energy.position, energy.gradient)
if self.resample:
energy = energy.resample_at(x)
myassert(isinstance(energy, Energy))
myassert(x.domain is energy.position.domain)
energy = energy.at(x)
status = self._controller.check(energy)
if status != controller.CONTINUE:
return energy, status
[docs]
def reset(self):
self.counter = 1
self.s = None