# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2019, 2021 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
import numpy as np
from ..logger import logger
from ..utilities import NiftyMeta
[docs]
class LineEnergy:
"""Evaluates an underlying Energy along a certain line direction.
Given an Energy class and a line direction, its position is parametrized by
a scalar step size along the descent direction relative to a zero point.
Parameters
----------
line_position : float
Defines the full spatial position of this energy via
self.energy.position = zero_point + line_position*line_direction
energy : Energy
The Energy object which will be evaluated along the given direction.
line_direction : :class:`nifty8.field.Field`
Direction used for line evaluation. Does not have to be normalized.
offset : float *optional*
Indirectly defines the zero point of the line via the equation
energy.position = zero_point + offset*line_direction
(default : 0.).
Notes
-----
The LineEnergy is used in minimization schemes in order perform line
searches. It describes an underlying Energy which is restricted along one
direction, only requiring the step size parameter to determine a new
position.
"""
[docs]
def __init__(self, line_position, energy, line_direction, offset=0.):
self._line_position = float(line_position)
self._line_direction = line_direction
if self._line_position == float(offset):
self._energy = energy
else:
pos = energy.position \
+ (self._line_position-float(offset))*self._line_direction
self._energy = energy.at(position=pos)
[docs]
def at(self, line_position):
"""Returns LineEnergy at new position, memorizing the zero point.
Parameters
----------
line_position : float
Parameter for the new position on the line direction.
Returns
-------
LineEnergy object at new position with same zero point as `self`.
"""
return LineEnergy(line_position, self._energy, self._line_direction,
offset=self._line_position)
@property
def energy(self):
"""
Energy : The underlying Energy object
"""
return self._energy
@property
def value(self):
"""
float : The value of the energy functional at given `position`.
"""
return self._energy.value
@property
def directional_derivative(self):
"""
float : The directional derivative at the given `position`.
"""
res = self._energy.gradient.s_vdot(self._line_direction)
if abs(res.imag) / max(abs(res.real), 1.) > 1e-12:
from ..logger import logger
logger.warning("directional derivative has non-negligible "
"imaginary part: {}".format(res))
return res.real
[docs]
class LineSearch(metaclass=NiftyMeta):
"""Class for finding a step size that satisfies the strong Wolfe
conditions.
Algorithm contains two stages. It begins with a trial step length and
keeps increasing it until it finds an acceptable step length or an
interval. If it does not satisfy the Wolfe conditions, it performs the Zoom
algorithm (second stage). By interpolating it decreases the size of the
interval until an acceptable step length is found.
Parameters
----------
preferred_initial_step_size : float, optional
Newton-based methods should intialize this to 1.
c1 : float
Parameter for Armijo condition rule. Default: 1e-4.
c2 : float
Parameter for curvature condition rule. Default: 0.9.
max_step_size : float
Maximum step allowed in to be made in the descent direction.
Default: 1e30.
max_iterations : int, optional
Maximum number of iterations performed by the line search algorithm.
Default: 100.
max_zoom_iterations : int, optional
Maximum number of iterations performed by the zoom algorithm.
Default: 100.
"""
[docs]
def __init__(self, preferred_initial_step_size=None, c1=1e-4, c2=0.9,
max_step_size=1e30, max_iterations=100,
max_zoom_iterations=100):
self.preferred_initial_step_size = preferred_initial_step_size
self.c1 = float(c1)
self.c2 = float(c2)
self.max_step_size = max_step_size
self.max_iterations = int(max_iterations)
self.max_zoom_iterations = int(max_zoom_iterations)
def _zoom(self, alpha_lo, alpha_hi, phi_0, phiprime_0,
phi_lo, phiprime_lo, phi_hi, le_0):
"""Performs the second stage of the line search algorithm.
If the first stage was not successful then the Zoom algorithm tries to
find a suitable step length by using bisection, quadratic, cubic
interpolation.
Parameters
----------
alpha_lo : float
A boundary for the step length interval.
Fulfills Wolfe condition 1.
alpha_hi : float
The other boundary for the step length interval.
phi_0 : float
Value of the energy at the starting point of the line search
algorithm.
phiprime_0 : float
directional derivative at the starting point of the line search
algorithm.
phi_lo : float
Value of the energy if we perform a step of length alpha_lo in
descent direction.
phiprime_lo : float
directional derivative at the new position if we perform a step of
length alpha_lo in descent direction.
phi_hi : float
Value of the energy if we perform a step of length alpha_hi in
descent direction.
Returns
-------
Energy
The new Energy object on the new position.
"""
cubic_delta = 0.2 # cubic interpolant checks
quad_delta = 0.1 # quadratic interpolant checks
alpha_recent = None
phi_recent = None
if phi_lo > phi_0 + self.c1*alpha_lo*phiprime_0:
raise ValueError("inconsistent data")
if phiprime_lo*(alpha_hi-alpha_lo) >= 0.:
raise ValueError("inconsistent data")
for i in range(self.max_zoom_iterations):
# myassert(phi_lo <= phi_0 + self.c1*alpha_lo*phiprime_0)
# myassert(phiprime_lo*(alpha_hi-alpha_lo)<0.)
delta_alpha = alpha_hi - alpha_lo
a, b = min(alpha_lo, alpha_hi), max(alpha_lo, alpha_hi)
# Try cubic interpolation
if i > 0:
cubic_check = cubic_delta * delta_alpha
alpha_j = self._cubicmin(alpha_lo, phi_lo, phiprime_lo,
alpha_hi, phi_hi,
alpha_recent, phi_recent)
# If cubic was not successful or not available, try quadratic
if (i == 0) or (alpha_j is None) or (alpha_j > b - cubic_check) or\
(alpha_j < a + cubic_check):
quad_check = quad_delta * delta_alpha
alpha_j = self._quadmin(alpha_lo, phi_lo, phiprime_lo,
alpha_hi, phi_hi)
# If quadratic was not successful, try bisection
if (alpha_j is None) or (alpha_j > b - quad_check) or \
(alpha_j < a + quad_check):
alpha_j = alpha_lo + 0.5*delta_alpha
# Check if the current value of alpha_j is already sufficient
le_alphaj = le_0.at(alpha_j)
phi_alphaj = le_alphaj.value
# If the first Wolfe condition is not met replace alpha_hi
# by alpha_j
if (phi_alphaj > phi_0 + self.c1*alpha_j*phiprime_0) or \
(phi_alphaj >= phi_lo):
alpha_recent, phi_recent = alpha_hi, phi_hi
alpha_hi, phi_hi = alpha_j, phi_alphaj
else:
phiprime_alphaj = le_alphaj.directional_derivative
# If the second Wolfe condition is met, return the result
if abs(phiprime_alphaj) <= -self.c2*phiprime_0:
return le_alphaj.energy, True
# If not, check the sign of the slope
if phiprime_alphaj*delta_alpha >= 0:
alpha_recent, phi_recent = alpha_hi, phi_hi
alpha_hi, phi_hi = alpha_lo, phi_lo
else:
alpha_recent, phi_recent = alpha_lo, phi_lo
# Replace alpha_lo by alpha_j
(alpha_lo, phi_lo, phiprime_lo) = (alpha_j, phi_alphaj,
phiprime_alphaj)
else:
logger.warning(
"The line search algorithm (zoom) did not converge.")
return le_alphaj.energy, False
def _cubicmin(self, a, fa, fpa, b, fb, c, fc):
"""Estimating the minimum with cubic interpolation.
Finds the minimizer for a cubic polynomial that goes through the
points (a,a), (b,fb), and (c,fc) with derivative at point a of fpa.
If no minimizer can be found return None
Parameters
----------
a, fa, fpa : float
abscissa, function value and derivative at first point
b, fb : float
abscissa and function value at second point
c, fc : float
abscissa and function value at third point
Returns
-------
xmin : float
Position of the approximated minimum.
"""
with np.errstate(divide='raise', over='raise', invalid='raise'):
try:
C = fpa
db = b - a
dc = c - a
denom = db * db * dc * dc * (db - dc)
d1 = np.empty((2, 2))
d1[0, 0] = dc * dc
d1[0, 1] = -(db*db)
d1[1, 0] = -(dc*dc*dc)
d1[1, 1] = db*db*db
[A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
fc - fa - C * dc]).ravel())
A /= denom
B /= denom
radical = B * B - 3 * A * C
xmin = a + (-B + np.sqrt(radical)) / (3 * A)
except ArithmeticError:
return None
if not np.isfinite(xmin):
return None
return xmin
def _quadmin(self, a, fa, fpa, b, fb):
"""Estimating the minimum with quadratic interpolation.
Finds the minimizer for a quadratic polynomial that goes through
the points (a,fa), (b,fb) with derivative at point a of fpa.
Parameters
----------
a, fa, fpa : float
abscissa, function value and derivative at first point
b, fb : float
abscissa and function value at second point
Returns
-------
xmin : float
Position of the approximated minimum.
"""
with np.errstate(divide='raise', over='raise', invalid='raise'):
try:
db = b - a * 1.0
B = (fb - fa - fpa * db) / (db * db)
xmin = a - fpa / (2.0 * B)
except ArithmeticError:
return None
if not np.isfinite(xmin):
return None
return xmin