up follow livre
This commit is contained in:
parent
b4b4398bb0
commit
3a7a3849ae
12242 changed files with 2564461 additions and 6914 deletions
|
|
@ -0,0 +1 @@
|
|||
from .optimize cimport cython_optimize
|
||||
460
venv/lib/python3.13/site-packages/scipy/optimize/__init__.py
Normal file
460
venv/lib/python3.13/site-packages/scipy/optimize/__init__.py
Normal file
|
|
@ -0,0 +1,460 @@
|
|||
"""
|
||||
=====================================================
|
||||
Optimization and root finding (:mod:`scipy.optimize`)
|
||||
=====================================================
|
||||
|
||||
.. currentmodule:: scipy.optimize
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
|
||||
optimize.cython_optimize
|
||||
|
||||
SciPy ``optimize`` provides functions for minimizing (or maximizing)
|
||||
objective functions, possibly subject to constraints. It includes
|
||||
solvers for nonlinear problems (with support for both local and global
|
||||
optimization algorithms), linear programming, constrained
|
||||
and nonlinear least-squares, root finding, and curve fitting.
|
||||
|
||||
Common functions and objects, shared across different solvers, are:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
show_options - Show specific options optimization solvers.
|
||||
OptimizeResult - The optimization result returned by some optimizers.
|
||||
OptimizeWarning - The optimization encountered problems.
|
||||
|
||||
|
||||
Optimization
|
||||
============
|
||||
|
||||
Scalar functions optimization
|
||||
-----------------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
minimize_scalar - Interface for minimizers of univariate functions
|
||||
|
||||
The `minimize_scalar` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.minimize_scalar-brent
|
||||
optimize.minimize_scalar-bounded
|
||||
optimize.minimize_scalar-golden
|
||||
|
||||
Local (multivariate) optimization
|
||||
---------------------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
minimize - Interface for minimizers of multivariate functions.
|
||||
|
||||
The `minimize` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.minimize-neldermead
|
||||
optimize.minimize-powell
|
||||
optimize.minimize-cg
|
||||
optimize.minimize-bfgs
|
||||
optimize.minimize-newtoncg
|
||||
optimize.minimize-lbfgsb
|
||||
optimize.minimize-tnc
|
||||
optimize.minimize-cobyla
|
||||
optimize.minimize-cobyqa
|
||||
optimize.minimize-slsqp
|
||||
optimize.minimize-trustconstr
|
||||
optimize.minimize-dogleg
|
||||
optimize.minimize-trustncg
|
||||
optimize.minimize-trustkrylov
|
||||
optimize.minimize-trustexact
|
||||
|
||||
Constraints are passed to `minimize` function as a single object or
|
||||
as a list of objects from the following classes:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
NonlinearConstraint - Class defining general nonlinear constraints.
|
||||
LinearConstraint - Class defining general linear constraints.
|
||||
|
||||
Simple bound constraints are handled separately and there is a special class
|
||||
for them:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
Bounds - Bound constraints.
|
||||
|
||||
Quasi-Newton strategies implementing `HessianUpdateStrategy`
|
||||
interface can be used to approximate the Hessian in `minimize`
|
||||
function (available only for the 'trust-constr' method). Available
|
||||
quasi-Newton methods implementing this interface are:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
BFGS - Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
|
||||
SR1 - Symmetric-rank-1 Hessian update strategy.
|
||||
|
||||
.. _global_optimization:
|
||||
|
||||
Global optimization
|
||||
-------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
basinhopping - Basinhopping stochastic optimizer.
|
||||
brute - Brute force searching optimizer.
|
||||
differential_evolution - Stochastic optimizer using differential evolution.
|
||||
|
||||
shgo - Simplicial homology global optimizer.
|
||||
dual_annealing - Dual annealing stochastic optimizer.
|
||||
direct - DIRECT (Dividing Rectangles) optimizer.
|
||||
|
||||
Least-squares and curve fitting
|
||||
===============================
|
||||
|
||||
Nonlinear least-squares
|
||||
-----------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
least_squares - Solve a nonlinear least-squares problem with bounds on the variables.
|
||||
|
||||
Linear least-squares
|
||||
--------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
nnls - Linear least-squares problem with non-negativity constraint.
|
||||
lsq_linear - Linear least-squares problem with bound constraints.
|
||||
isotonic_regression - Least squares problem of isotonic regression via PAVA.
|
||||
|
||||
Curve fitting
|
||||
-------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
curve_fit -- Fit curve to a set of points.
|
||||
|
||||
Root finding
|
||||
============
|
||||
|
||||
Scalar functions
|
||||
----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
root_scalar - Unified interface for nonlinear solvers of scalar functions.
|
||||
brentq - quadratic interpolation Brent method.
|
||||
brenth - Brent method, modified by Harris with hyperbolic extrapolation.
|
||||
ridder - Ridder's method.
|
||||
bisect - Bisection method.
|
||||
newton - Newton's method (also Secant and Halley's methods).
|
||||
toms748 - Alefeld, Potra & Shi Algorithm 748.
|
||||
RootResults - The root finding result returned by some root finders.
|
||||
|
||||
The `root_scalar` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.root_scalar-brentq
|
||||
optimize.root_scalar-brenth
|
||||
optimize.root_scalar-bisect
|
||||
optimize.root_scalar-ridder
|
||||
optimize.root_scalar-newton
|
||||
optimize.root_scalar-toms748
|
||||
optimize.root_scalar-secant
|
||||
optimize.root_scalar-halley
|
||||
|
||||
|
||||
|
||||
The table below lists situations and appropriate methods, along with
|
||||
*asymptotic* convergence rates per iteration (and per function evaluation)
|
||||
for successful convergence to a simple root(*).
|
||||
Bisection is the slowest of them all, adding one bit of accuracy for each
|
||||
function evaluation, but is guaranteed to converge.
|
||||
The other bracketing methods all (eventually) increase the number of accurate
|
||||
bits by about 50% for every function evaluation.
|
||||
The derivative-based methods, all built on `newton`, can converge quite quickly
|
||||
if the initial value is close to the root. They can also be applied to
|
||||
functions defined on (a subset of) the complex plane.
|
||||
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| Domain of f | Bracket? | Derivatives? | Solvers | Convergence |
|
||||
+ + +----------+-----------+ +-------------+----------------+
|
||||
| | | `fprime` | `fprime2` | | Guaranteed? | Rate(s)(*) |
|
||||
+=============+==========+==========+===========+=============+=============+================+
|
||||
| `R` | Yes | N/A | N/A | - bisection | - Yes | - 1 "Linear" |
|
||||
| | | | | - brentq | - Yes | - >=1, <= 1.62 |
|
||||
| | | | | - brenth | - Yes | - >=1, <= 1.62 |
|
||||
| | | | | - ridder | - Yes | - 2.0 (1.41) |
|
||||
| | | | | - toms748 | - Yes | - 2.7 (1.65) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| `R` or `C` | No | No | No | secant | No | 1.62 (1.62) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| `R` or `C` | No | Yes | No | newton | No | 2.00 (1.41) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| `R` or `C` | No | Yes | Yes | halley | No | 3.00 (1.44) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
|
||||
.. seealso::
|
||||
|
||||
`scipy.optimize.cython_optimize` -- Typed Cython versions of root finding functions
|
||||
|
||||
Fixed point finding:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fixed_point - Single-variable fixed-point solver.
|
||||
|
||||
Multidimensional
|
||||
----------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
root - Unified interface for nonlinear solvers of multivariate functions.
|
||||
|
||||
The `root` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.root-hybr
|
||||
optimize.root-lm
|
||||
optimize.root-broyden1
|
||||
optimize.root-broyden2
|
||||
optimize.root-anderson
|
||||
optimize.root-linearmixing
|
||||
optimize.root-diagbroyden
|
||||
optimize.root-excitingmixing
|
||||
optimize.root-krylov
|
||||
optimize.root-dfsane
|
||||
|
||||
Elementwise Minimization and Root Finding
|
||||
=========================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
|
||||
optimize.elementwise
|
||||
|
||||
Linear programming / MILP
|
||||
=========================
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
milp -- Mixed integer linear programming.
|
||||
linprog -- Unified interface for minimizers of linear programming problems.
|
||||
|
||||
The `linprog` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.linprog-simplex
|
||||
optimize.linprog-interior-point
|
||||
optimize.linprog-revised_simplex
|
||||
optimize.linprog-highs-ipm
|
||||
optimize.linprog-highs-ds
|
||||
optimize.linprog-highs
|
||||
|
||||
The simplex, interior-point, and revised simplex methods support callback
|
||||
functions, such as:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linprog_verbose_callback -- Sample callback function for linprog (simplex).
|
||||
|
||||
Assignment problems
|
||||
===================
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linear_sum_assignment -- Solves the linear-sum assignment problem.
|
||||
quadratic_assignment -- Solves the quadratic assignment problem.
|
||||
|
||||
The `quadratic_assignment` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.qap-faq
|
||||
optimize.qap-2opt
|
||||
|
||||
Utilities
|
||||
=========
|
||||
|
||||
Finite-difference approximation
|
||||
-------------------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
approx_fprime - Approximate the gradient of a scalar function.
|
||||
check_grad - Check the supplied derivative using finite differences.
|
||||
|
||||
|
||||
Line search
|
||||
-----------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
bracket - Bracket a minimum, given two starting points.
|
||||
line_search - Return a step that satisfies the strong Wolfe conditions.
|
||||
|
||||
Hessian approximation
|
||||
---------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
LbfgsInvHessProduct - Linear operator for L-BFGS approximate inverse Hessian.
|
||||
HessianUpdateStrategy - Interface for implementing Hessian update strategies
|
||||
|
||||
Benchmark problems
|
||||
------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
rosen - The Rosenbrock function.
|
||||
rosen_der - The derivative of the Rosenbrock function.
|
||||
rosen_hess - The Hessian matrix of the Rosenbrock function.
|
||||
rosen_hess_prod - Product of the Rosenbrock Hessian with a vector.
|
||||
|
||||
Legacy functions
|
||||
================
|
||||
|
||||
The functions below are not recommended for use in new scripts;
|
||||
all of these methods are accessible via a newer, more consistent
|
||||
interfaces, provided by the interfaces above.
|
||||
|
||||
Optimization
|
||||
------------
|
||||
|
||||
General-purpose multivariate methods:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fmin - Nelder-Mead Simplex algorithm.
|
||||
fmin_powell - Powell's (modified) conjugate direction method.
|
||||
fmin_cg - Non-linear (Polak-Ribiere) conjugate gradient algorithm.
|
||||
fmin_bfgs - Quasi-Newton method (Broydon-Fletcher-Goldfarb-Shanno).
|
||||
fmin_ncg - Line-search Newton Conjugate Gradient.
|
||||
|
||||
Constrained multivariate methods:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fmin_l_bfgs_b - Zhu, Byrd, and Nocedal's constrained optimizer.
|
||||
fmin_tnc - Truncated Newton code.
|
||||
fmin_cobyla - Constrained optimization by linear approximation.
|
||||
fmin_slsqp - Minimization using sequential least-squares programming.
|
||||
|
||||
Univariate (scalar) minimization methods:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fminbound - Bounded minimization of a scalar function.
|
||||
brent - 1-D function minimization using Brent method.
|
||||
golden - 1-D function minimization using Golden Section method.
|
||||
|
||||
Least-squares
|
||||
-------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
leastsq - Minimize the sum of squares of M equations in N unknowns.
|
||||
|
||||
Root finding
|
||||
------------
|
||||
|
||||
General nonlinear solvers:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fsolve - Non-linear multivariable equation solver.
|
||||
broyden1 - Broyden's first method.
|
||||
broyden2 - Broyden's second method.
|
||||
NoConvergence - Exception raised when nonlinear solver does not converge.
|
||||
|
||||
Large-scale nonlinear solvers:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
newton_krylov
|
||||
anderson
|
||||
|
||||
BroydenFirst
|
||||
InverseJacobian
|
||||
KrylovJacobian
|
||||
|
||||
Simple iteration solvers:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
excitingmixing
|
||||
linearmixing
|
||||
diagbroyden
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
from ._optimize import *
|
||||
from ._minimize import *
|
||||
from ._root import *
|
||||
from ._root_scalar import *
|
||||
from ._minpack_py import *
|
||||
from ._zeros_py import *
|
||||
from ._lbfgsb_py import fmin_l_bfgs_b, LbfgsInvHessProduct
|
||||
from ._tnc import fmin_tnc
|
||||
from ._cobyla_py import fmin_cobyla
|
||||
from ._nonlin import *
|
||||
from ._slsqp_py import fmin_slsqp
|
||||
from ._nnls import nnls
|
||||
from ._basinhopping import basinhopping
|
||||
from ._linprog import linprog, linprog_verbose_callback
|
||||
from ._lsap import linear_sum_assignment
|
||||
from ._differentialevolution import differential_evolution
|
||||
from ._lsq import least_squares, lsq_linear
|
||||
from ._isotonic import isotonic_regression
|
||||
from ._constraints import (NonlinearConstraint,
|
||||
LinearConstraint,
|
||||
Bounds)
|
||||
from ._hessian_update_strategy import HessianUpdateStrategy, BFGS, SR1
|
||||
from ._shgo import shgo
|
||||
from ._dual_annealing import dual_annealing
|
||||
from ._qap import quadratic_assignment
|
||||
from ._direct_py import direct
|
||||
from ._milp import milp
|
||||
|
||||
# Deprecated namespaces, to be removed in v2.0.0
|
||||
from . import (
|
||||
cobyla, lbfgsb, linesearch, minpack, minpack2, moduleTNC, nonlin, optimize,
|
||||
slsqp, tnc, zeros
|
||||
)
|
||||
|
||||
__all__ = [s for s in dir() if not s.startswith('_')]
|
||||
|
||||
from scipy._lib._testutils import PytestTester
|
||||
test = PytestTester(__name__)
|
||||
del PytestTester
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,741 @@
|
|||
"""
|
||||
basinhopping: The basinhopping global optimization algorithm
|
||||
"""
|
||||
import numpy as np
|
||||
import math
|
||||
import inspect
|
||||
import scipy.optimize
|
||||
from scipy._lib._util import check_random_state, _transition_to_rng
|
||||
|
||||
__all__ = ['basinhopping']
|
||||
|
||||
|
||||
_params = (inspect.Parameter('res_new', kind=inspect.Parameter.KEYWORD_ONLY),
|
||||
inspect.Parameter('res_old', kind=inspect.Parameter.KEYWORD_ONLY))
|
||||
_new_accept_test_signature = inspect.Signature(parameters=_params)
|
||||
|
||||
|
||||
class Storage:
|
||||
"""
|
||||
Class used to store the lowest energy structure
|
||||
"""
|
||||
def __init__(self, minres):
|
||||
self._add(minres)
|
||||
|
||||
def _add(self, minres):
|
||||
self.minres = minres
|
||||
self.minres.x = np.copy(minres.x)
|
||||
|
||||
def update(self, minres):
|
||||
if minres.success and (minres.fun < self.minres.fun
|
||||
or not self.minres.success):
|
||||
self._add(minres)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_lowest(self):
|
||||
return self.minres
|
||||
|
||||
|
||||
class BasinHoppingRunner:
|
||||
"""This class implements the core of the basinhopping algorithm.
|
||||
|
||||
x0 : ndarray
|
||||
The starting coordinates.
|
||||
minimizer : callable
|
||||
The local minimizer, with signature ``result = minimizer(x)``.
|
||||
The return value is an `optimize.OptimizeResult` object.
|
||||
step_taking : callable
|
||||
This function displaces the coordinates randomly. Signature should
|
||||
be ``x_new = step_taking(x)``. Note that `x` may be modified in-place.
|
||||
accept_tests : list of callables
|
||||
Each test is passed the kwargs `f_new`, `x_new`, `f_old` and
|
||||
`x_old`. These tests will be used to judge whether or not to accept
|
||||
the step. The acceptable return values are True, False, or ``"force
|
||||
accept"``. If any of the tests return False then the step is rejected.
|
||||
If ``"force accept"``, then this will override any other tests in
|
||||
order to accept the step. This can be used, for example, to forcefully
|
||||
escape from a local minimum that ``basinhopping`` is trapped in.
|
||||
disp : bool, optional
|
||||
Display status messages.
|
||||
|
||||
"""
|
||||
def __init__(self, x0, minimizer, step_taking, accept_tests, disp=False):
|
||||
self.x = np.copy(x0)
|
||||
self.minimizer = minimizer
|
||||
self.step_taking = step_taking
|
||||
self.accept_tests = accept_tests
|
||||
self.disp = disp
|
||||
|
||||
self.nstep = 0
|
||||
|
||||
# initialize return object
|
||||
self.res = scipy.optimize.OptimizeResult()
|
||||
self.res.minimization_failures = 0
|
||||
|
||||
# do initial minimization
|
||||
minres = minimizer(self.x)
|
||||
if not minres.success:
|
||||
self.res.minimization_failures += 1
|
||||
if self.disp:
|
||||
print("warning: basinhopping: local minimization failure")
|
||||
self.x = np.copy(minres.x)
|
||||
self.energy = minres.fun
|
||||
self.incumbent_minres = minres # best minimize result found so far
|
||||
if self.disp:
|
||||
print(f"basinhopping step {self.nstep}: f {self.energy:g}")
|
||||
|
||||
# initialize storage class
|
||||
self.storage = Storage(minres)
|
||||
|
||||
if hasattr(minres, "nfev"):
|
||||
self.res.nfev = minres.nfev
|
||||
if hasattr(minres, "njev"):
|
||||
self.res.njev = minres.njev
|
||||
if hasattr(minres, "nhev"):
|
||||
self.res.nhev = minres.nhev
|
||||
|
||||
def _monte_carlo_step(self):
|
||||
"""Do one Monte Carlo iteration
|
||||
|
||||
Randomly displace the coordinates, minimize, and decide whether
|
||||
or not to accept the new coordinates.
|
||||
"""
|
||||
# Take a random step. Make a copy of x because the step_taking
|
||||
# algorithm might change x in place
|
||||
x_after_step = np.copy(self.x)
|
||||
x_after_step = self.step_taking(x_after_step)
|
||||
|
||||
# do a local minimization
|
||||
minres = self.minimizer(x_after_step)
|
||||
x_after_quench = minres.x
|
||||
energy_after_quench = minres.fun
|
||||
if not minres.success:
|
||||
self.res.minimization_failures += 1
|
||||
if self.disp:
|
||||
print("warning: basinhopping: local minimization failure")
|
||||
if hasattr(minres, "nfev"):
|
||||
self.res.nfev += minres.nfev
|
||||
if hasattr(minres, "njev"):
|
||||
self.res.njev += minres.njev
|
||||
if hasattr(minres, "nhev"):
|
||||
self.res.nhev += minres.nhev
|
||||
|
||||
# accept the move based on self.accept_tests. If any test is False,
|
||||
# then reject the step. If any test returns the special string
|
||||
# 'force accept', then accept the step regardless. This can be used
|
||||
# to forcefully escape from a local minimum if normal basin hopping
|
||||
# steps are not sufficient.
|
||||
accept = True
|
||||
for test in self.accept_tests:
|
||||
if inspect.signature(test) == _new_accept_test_signature:
|
||||
testres = test(res_new=minres, res_old=self.incumbent_minres)
|
||||
else:
|
||||
testres = test(f_new=energy_after_quench, x_new=x_after_quench,
|
||||
f_old=self.energy, x_old=self.x)
|
||||
|
||||
if testres == 'force accept':
|
||||
accept = True
|
||||
break
|
||||
elif testres is None:
|
||||
raise ValueError("accept_tests must return True, False, or "
|
||||
"'force accept'")
|
||||
elif not testres:
|
||||
accept = False
|
||||
|
||||
# Report the result of the acceptance test to the take step class.
|
||||
# This is for adaptive step taking
|
||||
if hasattr(self.step_taking, "report"):
|
||||
self.step_taking.report(accept, f_new=energy_after_quench,
|
||||
x_new=x_after_quench, f_old=self.energy,
|
||||
x_old=self.x)
|
||||
|
||||
return accept, minres
|
||||
|
||||
def one_cycle(self):
|
||||
"""Do one cycle of the basinhopping algorithm
|
||||
"""
|
||||
self.nstep += 1
|
||||
new_global_min = False
|
||||
|
||||
accept, minres = self._monte_carlo_step()
|
||||
|
||||
if accept:
|
||||
self.energy = minres.fun
|
||||
self.x = np.copy(minres.x)
|
||||
self.incumbent_minres = minres # best minimize result found so far
|
||||
new_global_min = self.storage.update(minres)
|
||||
|
||||
# print some information
|
||||
if self.disp:
|
||||
self.print_report(minres.fun, accept)
|
||||
if new_global_min:
|
||||
print(
|
||||
f"found new global minimum on step {self.nstep} with "
|
||||
f"function value {self.energy:g}"
|
||||
)
|
||||
|
||||
# save some variables as BasinHoppingRunner attributes
|
||||
self.xtrial = minres.x
|
||||
self.energy_trial = minres.fun
|
||||
self.accept = accept
|
||||
|
||||
return new_global_min
|
||||
|
||||
def print_report(self, energy_trial, accept):
|
||||
"""print a status update"""
|
||||
minres = self.storage.get_lowest()
|
||||
print(
|
||||
f"basinhopping step {self.nstep}: f {self.energy:g} "
|
||||
f"trial_f {energy_trial:g} accepted {accept} "
|
||||
f"lowest_f {minres.fun:g}"
|
||||
)
|
||||
|
||||
|
||||
class AdaptiveStepsize:
|
||||
"""
|
||||
Class to implement adaptive stepsize.
|
||||
|
||||
This class wraps the step taking class and modifies the stepsize to
|
||||
ensure the true acceptance rate is as close as possible to the target.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
takestep : callable
|
||||
The step taking routine. Must contain modifiable attribute
|
||||
takestep.stepsize
|
||||
accept_rate : float, optional
|
||||
The target step acceptance rate
|
||||
interval : int, optional
|
||||
Interval for how often to update the stepsize
|
||||
factor : float, optional
|
||||
The step size is multiplied or divided by this factor upon each
|
||||
update.
|
||||
verbose : bool, optional
|
||||
Print information about each update
|
||||
|
||||
"""
|
||||
def __init__(self, takestep, accept_rate=0.5, interval=50, factor=0.9,
|
||||
verbose=True):
|
||||
self.takestep = takestep
|
||||
self.target_accept_rate = accept_rate
|
||||
self.interval = interval
|
||||
self.factor = factor
|
||||
self.verbose = verbose
|
||||
|
||||
self.nstep = 0
|
||||
self.nstep_tot = 0
|
||||
self.naccept = 0
|
||||
|
||||
def __call__(self, x):
|
||||
return self.take_step(x)
|
||||
|
||||
def _adjust_step_size(self):
|
||||
old_stepsize = self.takestep.stepsize
|
||||
accept_rate = float(self.naccept) / self.nstep
|
||||
if accept_rate > self.target_accept_rate:
|
||||
# We're accepting too many steps. This generally means we're
|
||||
# trapped in a basin. Take bigger steps.
|
||||
self.takestep.stepsize /= self.factor
|
||||
else:
|
||||
# We're not accepting enough steps. Take smaller steps.
|
||||
self.takestep.stepsize *= self.factor
|
||||
if self.verbose:
|
||||
print(f"adaptive stepsize: acceptance rate {accept_rate:f} target "
|
||||
f"{self.target_accept_rate:f} new stepsize "
|
||||
f"{self.takestep.stepsize:g} old stepsize {old_stepsize:g}")
|
||||
|
||||
def take_step(self, x):
|
||||
self.nstep += 1
|
||||
self.nstep_tot += 1
|
||||
if self.nstep % self.interval == 0:
|
||||
self._adjust_step_size()
|
||||
return self.takestep(x)
|
||||
|
||||
def report(self, accept, **kwargs):
|
||||
"called by basinhopping to report the result of the step"
|
||||
if accept:
|
||||
self.naccept += 1
|
||||
|
||||
|
||||
class RandomDisplacement:
|
||||
"""Add a random displacement of maximum size `stepsize` to each coordinate.
|
||||
|
||||
Calling this updates `x` in-place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
stepsize : float, optional
|
||||
Maximum stepsize in any dimension
|
||||
rng : {None, int, `numpy.random.Generator`}, optional
|
||||
Random number generator
|
||||
"""
|
||||
|
||||
def __init__(self, stepsize=0.5, rng=None):
|
||||
self.stepsize = stepsize
|
||||
self.rng = check_random_state(rng)
|
||||
|
||||
def __call__(self, x):
|
||||
x += self.rng.uniform(-self.stepsize, self.stepsize,
|
||||
np.shape(x))
|
||||
return x
|
||||
|
||||
|
||||
class MinimizerWrapper:
|
||||
"""
|
||||
wrap a minimizer function as a minimizer class
|
||||
"""
|
||||
def __init__(self, minimizer, func=None, **kwargs):
|
||||
self.minimizer = minimizer
|
||||
self.func = func
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __call__(self, x0):
|
||||
if self.func is None:
|
||||
return self.minimizer(x0, **self.kwargs)
|
||||
else:
|
||||
return self.minimizer(self.func, x0, **self.kwargs)
|
||||
|
||||
|
||||
class Metropolis:
|
||||
"""Metropolis acceptance criterion.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : float
|
||||
The "temperature" parameter for the accept or reject criterion.
|
||||
rng : {None, int, `numpy.random.Generator`}, optional
|
||||
Random number generator used for acceptance test.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, T, rng=None):
|
||||
# Avoid ZeroDivisionError since "MBH can be regarded as a special case
|
||||
# of the BH framework with the Metropolis criterion, where temperature
|
||||
# T = 0." (Reject all steps that increase energy.)
|
||||
self.beta = 1.0 / T if T != 0 else float('inf')
|
||||
self.rng = check_random_state(rng)
|
||||
|
||||
def accept_reject(self, res_new, res_old):
|
||||
"""
|
||||
Assuming the local search underlying res_new was successful:
|
||||
If new energy is lower than old, it will always be accepted.
|
||||
If new is higher than old, there is a chance it will be accepted,
|
||||
less likely for larger differences.
|
||||
"""
|
||||
with np.errstate(invalid='ignore'):
|
||||
# The energy values being fed to Metropolis are 1-length arrays, and if
|
||||
# they are equal, their difference is 0, which gets multiplied by beta,
|
||||
# which is inf, and array([0]) * float('inf') causes
|
||||
#
|
||||
# RuntimeWarning: invalid value encountered in multiply
|
||||
#
|
||||
# Ignore this warning so when the algorithm is on a flat plane, it always
|
||||
# accepts the step, to try to move off the plane.
|
||||
prod = -(res_new.fun - res_old.fun) * self.beta
|
||||
w = math.exp(min(0, prod))
|
||||
|
||||
rand = self.rng.uniform()
|
||||
return w >= rand and (res_new.success or not res_old.success)
|
||||
|
||||
def __call__(self, *, res_new, res_old):
|
||||
"""
|
||||
f_new and f_old are mandatory in kwargs
|
||||
"""
|
||||
return bool(self.accept_reject(res_new, res_old))
|
||||
|
||||
|
||||
@_transition_to_rng("seed", position_num=12, replace_doc=True)
|
||||
def basinhopping(func, x0, niter=100, T=1.0, stepsize=0.5,
|
||||
minimizer_kwargs=None, take_step=None, accept_test=None,
|
||||
callback=None, interval=50, disp=False, niter_success=None,
|
||||
rng=None, *, target_accept_rate=0.5, stepwise_factor=0.9):
|
||||
"""Find the global minimum of a function using the basin-hopping algorithm.
|
||||
|
||||
Basin-hopping is a two-phase method that combines a global stepping
|
||||
algorithm with local minimization at each step. Designed to mimic
|
||||
the natural process of energy minimization of clusters of atoms, it works
|
||||
well for similar problems with "funnel-like, but rugged" energy landscapes
|
||||
[5]_.
|
||||
|
||||
As the step-taking, step acceptance, and minimization methods are all
|
||||
customizable, this function can also be used to implement other two-phase
|
||||
methods.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable ``f(x, *args)``
|
||||
Function to be optimized. ``args`` can be passed as an optional item
|
||||
in the dict `minimizer_kwargs`
|
||||
x0 : array_like
|
||||
Initial guess.
|
||||
niter : integer, optional
|
||||
The number of basin-hopping iterations. There will be a total of
|
||||
``niter + 1`` runs of the local minimizer.
|
||||
T : float, optional
|
||||
The "temperature" parameter for the acceptance or rejection criterion.
|
||||
Higher "temperatures" mean that larger jumps in function value will be
|
||||
accepted. For best results `T` should be comparable to the
|
||||
separation (in function value) between local minima.
|
||||
stepsize : float, optional
|
||||
Maximum step size for use in the random displacement.
|
||||
minimizer_kwargs : dict, optional
|
||||
Extra keyword arguments to be passed to the local minimizer
|
||||
`scipy.optimize.minimize` Some important options could be:
|
||||
|
||||
method : str
|
||||
The minimization method (e.g. ``"L-BFGS-B"``)
|
||||
args : tuple
|
||||
Extra arguments passed to the objective function (`func`) and
|
||||
its derivatives (Jacobian, Hessian).
|
||||
|
||||
take_step : callable ``take_step(x)``, optional
|
||||
Replace the default step-taking routine with this routine. The default
|
||||
step-taking routine is a random displacement of the coordinates, but
|
||||
other step-taking algorithms may be better for some systems.
|
||||
`take_step` can optionally have the attribute ``take_step.stepsize``.
|
||||
If this attribute exists, then `basinhopping` will adjust
|
||||
``take_step.stepsize`` in order to try to optimize the global minimum
|
||||
search.
|
||||
accept_test : callable, ``accept_test(f_new=f_new, x_new=x_new, f_old=fold, x_old=x_old)``, optional
|
||||
Define a test which will be used to judge whether to accept the
|
||||
step. This will be used in addition to the Metropolis test based on
|
||||
"temperature" `T`. The acceptable return values are True,
|
||||
False, or ``"force accept"``. If any of the tests return False
|
||||
then the step is rejected. If the latter, then this will override any
|
||||
other tests in order to accept the step. This can be used, for example,
|
||||
to forcefully escape from a local minimum that `basinhopping` is
|
||||
trapped in.
|
||||
callback : callable, ``callback(x, f, accept)``, optional
|
||||
A callback function which will be called for all minima found. ``x``
|
||||
and ``f`` are the coordinates and function value of the trial minimum,
|
||||
and ``accept`` is whether that minimum was accepted. This can
|
||||
be used, for example, to save the lowest N minima found. Also,
|
||||
`callback` can be used to specify a user defined stop criterion by
|
||||
optionally returning True to stop the `basinhopping` routine.
|
||||
interval : integer, optional
|
||||
interval for how often to update the `stepsize`
|
||||
disp : bool, optional
|
||||
Set to True to print status messages
|
||||
niter_success : integer, optional
|
||||
Stop the run if the global minimum candidate remains the same for this
|
||||
number of iterations.
|
||||
rng : `numpy.random.Generator`, optional
|
||||
Pseudorandom number generator state. When `rng` is None, a new
|
||||
`numpy.random.Generator` is created using entropy from the
|
||||
operating system. Types other than `numpy.random.Generator` are
|
||||
passed to `numpy.random.default_rng` to instantiate a ``Generator``.
|
||||
|
||||
The random numbers generated only affect the default Metropolis
|
||||
`accept_test` and the default `take_step`. If you supply your own
|
||||
`take_step` and `accept_test`, and these functions use random
|
||||
number generation, then those functions are responsible for the state
|
||||
of their random number generator.
|
||||
target_accept_rate : float, optional
|
||||
The target acceptance rate that is used to adjust the `stepsize`.
|
||||
If the current acceptance rate is greater than the target,
|
||||
then the `stepsize` is increased. Otherwise, it is decreased.
|
||||
Range is (0, 1). Default is 0.5.
|
||||
|
||||
.. versionadded:: 1.8.0
|
||||
|
||||
stepwise_factor : float, optional
|
||||
The `stepsize` is multiplied or divided by this stepwise factor upon
|
||||
each update. Range is (0, 1). Default is 0.9.
|
||||
|
||||
.. versionadded:: 1.8.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a `OptimizeResult` object.
|
||||
Important attributes are: ``x`` the solution array, ``fun`` the value
|
||||
of the function at the solution, and ``message`` which describes the
|
||||
cause of the termination. The ``OptimizeResult`` object returned by the
|
||||
selected minimizer at the lowest minimum is also contained within this
|
||||
object and can be accessed through the ``lowest_optimization_result``
|
||||
attribute. ``lowest_optimization_result`` will only be updated if a
|
||||
local minimization was successful.
|
||||
See `OptimizeResult` for a description of other attributes.
|
||||
|
||||
See Also
|
||||
--------
|
||||
minimize :
|
||||
The local minimization function called once for each basinhopping step.
|
||||
`minimizer_kwargs` is passed to this routine.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Basin-hopping is a stochastic algorithm which attempts to find the global
|
||||
minimum of a smooth scalar function of one or more variables [1]_ [2]_ [3]_
|
||||
[4]_. The algorithm in its current form was described by David Wales and
|
||||
Jonathan Doye [2]_ http://www-wales.ch.cam.ac.uk/.
|
||||
|
||||
The algorithm is iterative with each cycle composed of the following
|
||||
features
|
||||
|
||||
1) random perturbation of the coordinates
|
||||
|
||||
2) local minimization
|
||||
|
||||
3) accept or reject the new coordinates based on the minimized function
|
||||
value
|
||||
|
||||
The acceptance test used here is the Metropolis criterion of standard Monte
|
||||
Carlo algorithms, although there are many other possibilities [3]_.
|
||||
|
||||
This global minimization method has been shown to be extremely efficient
|
||||
for a wide variety of problems in physics and chemistry. It is
|
||||
particularly useful when the function has many minima separated by large
|
||||
barriers. See the `Cambridge Cluster Database
|
||||
<https://www-wales.ch.cam.ac.uk/CCD.html>`_ for databases of molecular
|
||||
systems that have been optimized primarily using basin-hopping. This
|
||||
database includes minimization problems exceeding 300 degrees of freedom.
|
||||
|
||||
See the free software program `GMIN <https://www-wales.ch.cam.ac.uk/GMIN>`_
|
||||
for a Fortran implementation of basin-hopping. This implementation has many
|
||||
variations of the procedure described above, including more
|
||||
advanced step taking algorithms and alternate acceptance criterion.
|
||||
|
||||
For stochastic global optimization there is no way to determine if the true
|
||||
global minimum has actually been found. Instead, as a consistency check,
|
||||
the algorithm can be run from a number of different random starting points
|
||||
to ensure the lowest minimum found in each example has converged to the
|
||||
global minimum. For this reason, `basinhopping` will by default simply
|
||||
run for the number of iterations `niter` and return the lowest minimum
|
||||
found. It is left to the user to ensure that this is in fact the global
|
||||
minimum.
|
||||
|
||||
Choosing `stepsize`: This is a crucial parameter in `basinhopping` and
|
||||
depends on the problem being solved. The step is chosen uniformly in the
|
||||
region from x0-stepsize to x0+stepsize, in each dimension. Ideally, it
|
||||
should be comparable to the typical separation (in argument values) between
|
||||
local minima of the function being optimized. `basinhopping` will, by
|
||||
default, adjust `stepsize` to find an optimal value, but this may take
|
||||
many iterations. You will get quicker results if you set a sensible
|
||||
initial value for ``stepsize``.
|
||||
|
||||
Choosing `T`: The parameter `T` is the "temperature" used in the
|
||||
Metropolis criterion. Basinhopping steps are always accepted if
|
||||
``func(xnew) < func(xold)``. Otherwise, they are accepted with
|
||||
probability::
|
||||
|
||||
exp( -(func(xnew) - func(xold)) / T )
|
||||
|
||||
So, for best results, `T` should to be comparable to the typical
|
||||
difference (in function values) between local minima. (The height of
|
||||
"walls" between local minima is irrelevant.)
|
||||
|
||||
If `T` is 0, the algorithm becomes Monotonic Basin-Hopping, in which all
|
||||
steps that increase energy are rejected.
|
||||
|
||||
.. versionadded:: 0.12.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Wales, David J. 2003, Energy Landscapes, Cambridge University Press,
|
||||
Cambridge, UK.
|
||||
.. [2] Wales, D J, and Doye J P K, Global Optimization by Basin-Hopping and
|
||||
the Lowest Energy Structures of Lennard-Jones Clusters Containing up to
|
||||
110 Atoms. Journal of Physical Chemistry A, 1997, 101, 5111.
|
||||
.. [3] Li, Z. and Scheraga, H. A., Monte Carlo-minimization approach to the
|
||||
multiple-minima problem in protein folding, Proc. Natl. Acad. Sci. USA,
|
||||
1987, 84, 6611.
|
||||
.. [4] Wales, D. J. and Scheraga, H. A., Global optimization of clusters,
|
||||
crystals, and biomolecules, Science, 1999, 285, 1368.
|
||||
.. [5] Olson, B., Hashmi, I., Molloy, K., and Shehu1, A., Basin Hopping as
|
||||
a General and Versatile Optimization Framework for the Characterization
|
||||
of Biological Macromolecules, Advances in Artificial Intelligence,
|
||||
Volume 2012 (2012), Article ID 674832, :doi:`10.1155/2012/674832`
|
||||
|
||||
Examples
|
||||
--------
|
||||
The following example is a 1-D minimization problem, with many
|
||||
local minima superimposed on a parabola.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import basinhopping
|
||||
>>> func = lambda x: np.cos(14.5 * x - 0.3) + (x + 0.2) * x
|
||||
>>> x0 = [1.]
|
||||
|
||||
Basinhopping, internally, uses a local minimization algorithm. We will use
|
||||
the parameter `minimizer_kwargs` to tell basinhopping which algorithm to
|
||||
use and how to set up that minimizer. This parameter will be passed to
|
||||
`scipy.optimize.minimize`.
|
||||
|
||||
>>> minimizer_kwargs = {"method": "BFGS"}
|
||||
>>> ret = basinhopping(func, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=200)
|
||||
>>> # the global minimum is:
|
||||
>>> ret.x, ret.fun
|
||||
-0.1951, -1.0009
|
||||
|
||||
Next consider a 2-D minimization problem. Also, this time, we
|
||||
will use gradient information to significantly speed up the search.
|
||||
|
||||
>>> def func2d(x):
|
||||
... f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] +
|
||||
... 0.2) * x[0]
|
||||
... df = np.zeros(2)
|
||||
... df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
|
||||
... df[1] = 2. * x[1] + 0.2
|
||||
... return f, df
|
||||
|
||||
We'll also use a different local minimization algorithm. Also, we must tell
|
||||
the minimizer that our function returns both energy and gradient (Jacobian).
|
||||
|
||||
>>> minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
|
||||
>>> x0 = [1.0, 1.0]
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=200)
|
||||
>>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
|
||||
... ret.x[1],
|
||||
... ret.fun))
|
||||
global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
|
||||
|
||||
Here is an example using a custom step-taking routine. Imagine you want
|
||||
the first coordinate to take larger steps than the rest of the coordinates.
|
||||
This can be implemented like so:
|
||||
|
||||
>>> class MyTakeStep:
|
||||
... def __init__(self, stepsize=0.5):
|
||||
... self.stepsize = stepsize
|
||||
... self.rng = np.random.default_rng()
|
||||
... def __call__(self, x):
|
||||
... s = self.stepsize
|
||||
... x[0] += self.rng.uniform(-2.*s, 2.*s)
|
||||
... x[1:] += self.rng.uniform(-s, s, x[1:].shape)
|
||||
... return x
|
||||
|
||||
Since ``MyTakeStep.stepsize`` exists basinhopping will adjust the magnitude
|
||||
of `stepsize` to optimize the search. We'll use the same 2-D function as
|
||||
before
|
||||
|
||||
>>> mytakestep = MyTakeStep()
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=200, take_step=mytakestep)
|
||||
>>> print("global minimum: x = [%.4f, %.4f], f(x) = %.4f" % (ret.x[0],
|
||||
... ret.x[1],
|
||||
... ret.fun))
|
||||
global minimum: x = [-0.1951, -0.1000], f(x) = -1.0109
|
||||
|
||||
Now, let's do an example using a custom callback function which prints the
|
||||
value of every minimum found
|
||||
|
||||
>>> def print_fun(x, f, accepted):
|
||||
... print("at minimum %.4f accepted %d" % (f, int(accepted)))
|
||||
|
||||
We'll run it for only 10 basinhopping steps this time.
|
||||
|
||||
>>> rng = np.random.default_rng()
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=10, callback=print_fun, rng=rng)
|
||||
at minimum 0.4159 accepted 1
|
||||
at minimum -0.4317 accepted 1
|
||||
at minimum -1.0109 accepted 1
|
||||
at minimum -0.9073 accepted 1
|
||||
at minimum -0.4317 accepted 0
|
||||
at minimum -0.1021 accepted 1
|
||||
at minimum -0.7425 accepted 1
|
||||
at minimum -0.9073 accepted 1
|
||||
at minimum -0.4317 accepted 0
|
||||
at minimum -0.7425 accepted 1
|
||||
at minimum -0.9073 accepted 1
|
||||
|
||||
The minimum at -1.0109 is actually the global minimum, found already on the
|
||||
8th iteration.
|
||||
|
||||
""" # numpy/numpydoc#87 # noqa: E501
|
||||
if target_accept_rate <= 0. or target_accept_rate >= 1.:
|
||||
raise ValueError('target_accept_rate has to be in range (0, 1)')
|
||||
if stepwise_factor <= 0. or stepwise_factor >= 1.:
|
||||
raise ValueError('stepwise_factor has to be in range (0, 1)')
|
||||
|
||||
x0 = np.array(x0)
|
||||
|
||||
# set up the np.random generator
|
||||
rng = check_random_state(rng)
|
||||
|
||||
# set up minimizer
|
||||
if minimizer_kwargs is None:
|
||||
minimizer_kwargs = dict()
|
||||
wrapped_minimizer = MinimizerWrapper(scipy.optimize.minimize, func,
|
||||
**minimizer_kwargs)
|
||||
|
||||
# set up step-taking algorithm
|
||||
if take_step is not None:
|
||||
if not callable(take_step):
|
||||
raise TypeError("take_step must be callable")
|
||||
# if take_step.stepsize exists then use AdaptiveStepsize to control
|
||||
# take_step.stepsize
|
||||
if hasattr(take_step, "stepsize"):
|
||||
take_step_wrapped = AdaptiveStepsize(
|
||||
take_step, interval=interval,
|
||||
accept_rate=target_accept_rate,
|
||||
factor=stepwise_factor,
|
||||
verbose=disp)
|
||||
else:
|
||||
take_step_wrapped = take_step
|
||||
else:
|
||||
# use default
|
||||
displace = RandomDisplacement(stepsize=stepsize, rng=rng)
|
||||
take_step_wrapped = AdaptiveStepsize(displace, interval=interval,
|
||||
accept_rate=target_accept_rate,
|
||||
factor=stepwise_factor,
|
||||
verbose=disp)
|
||||
|
||||
# set up accept tests
|
||||
accept_tests = []
|
||||
if accept_test is not None:
|
||||
if not callable(accept_test):
|
||||
raise TypeError("accept_test must be callable")
|
||||
accept_tests = [accept_test]
|
||||
|
||||
# use default
|
||||
metropolis = Metropolis(T, rng=rng)
|
||||
accept_tests.append(metropolis)
|
||||
|
||||
if niter_success is None:
|
||||
niter_success = niter + 2
|
||||
|
||||
bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
|
||||
accept_tests, disp=disp)
|
||||
|
||||
# The wrapped minimizer is called once during construction of
|
||||
# BasinHoppingRunner, so run the callback
|
||||
if callable(callback):
|
||||
callback(bh.storage.minres.x, bh.storage.minres.fun, True)
|
||||
|
||||
# start main iteration loop
|
||||
count, i = 0, 0
|
||||
message = ["requested number of basinhopping iterations completed"
|
||||
" successfully"]
|
||||
for i in range(niter):
|
||||
new_global_min = bh.one_cycle()
|
||||
|
||||
if callable(callback):
|
||||
# should we pass a copy of x?
|
||||
val = callback(bh.xtrial, bh.energy_trial, bh.accept)
|
||||
if val is not None:
|
||||
if val:
|
||||
message = ["callback function requested stop early by"
|
||||
"returning True"]
|
||||
break
|
||||
|
||||
count += 1
|
||||
if new_global_min:
|
||||
count = 0
|
||||
elif count > niter_success:
|
||||
message = ["success condition satisfied"]
|
||||
break
|
||||
|
||||
# prepare return object
|
||||
res = bh.res
|
||||
res.lowest_optimization_result = bh.storage.get_lowest()
|
||||
res.x = np.copy(res.lowest_optimization_result.x)
|
||||
res.fun = res.lowest_optimization_result.fun
|
||||
res.message = message
|
||||
res.nit = i + 1
|
||||
res.success = res.lowest_optimization_result.success
|
||||
return res
|
||||
Binary file not shown.
706
venv/lib/python3.13/site-packages/scipy/optimize/_bracket.py
Normal file
706
venv/lib/python3.13/site-packages/scipy/optimize/_bracket.py
Normal file
|
|
@ -0,0 +1,706 @@
|
|||
import numpy as np
|
||||
import scipy._lib._elementwise_iterative_method as eim
|
||||
from scipy._lib._util import _RichResult
|
||||
from scipy._lib._array_api import array_namespace, xp_ravel, xp_promote
|
||||
|
||||
_ELIMITS = -1 # used in _bracket_root
|
||||
_ESTOPONESIDE = 2 # used in _bracket_root
|
||||
|
||||
def _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter):
|
||||
|
||||
if not callable(func):
|
||||
raise ValueError('`func` must be callable.')
|
||||
|
||||
if not np.iterable(args):
|
||||
args = (args,)
|
||||
|
||||
xp = array_namespace(xl0, xr0, xmin, xmax, factor, *args)
|
||||
|
||||
# If xr0 is not supplied, fill with a dummy value for the sake of
|
||||
# broadcasting. We need to wait until xmax has been validated to
|
||||
# compute the default value.
|
||||
xr0_not_supplied = False
|
||||
if xr0 is None:
|
||||
xr0 = xp.nan
|
||||
xr0_not_supplied = True
|
||||
|
||||
xmin = -xp.inf if xmin is None else xmin
|
||||
xmax = xp.inf if xmax is None else xmax
|
||||
factor = 2. if factor is None else factor
|
||||
xl0, xr0, xmin, xmax, factor = xp_promote(
|
||||
xl0, xr0, xmin, xmax, factor, broadcast=True, force_floating=True, xp=xp)
|
||||
|
||||
if not xp.isdtype(xl0.dtype, ('integral', 'real floating')):
|
||||
raise ValueError('`xl0` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xr0.dtype, "numeric")
|
||||
or xp.isdtype(xr0.dtype, "complex floating")):
|
||||
raise ValueError('`xr0` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xmin.dtype, "numeric")
|
||||
or xp.isdtype(xmin.dtype, "complex floating")):
|
||||
raise ValueError('`xmin` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xmax.dtype, "numeric")
|
||||
or xp.isdtype(xmax.dtype, "complex floating")):
|
||||
raise ValueError('`xmax` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(factor.dtype, "numeric")
|
||||
or xp.isdtype(factor.dtype, "complex floating")):
|
||||
raise ValueError('`factor` must be numeric and real.')
|
||||
if not xp.all(factor > 1):
|
||||
raise ValueError('All elements of `factor` must be greater than 1.')
|
||||
|
||||
# Calculate the default value of xr0 if a value has not been supplied.
|
||||
# Be careful to ensure xr0 is not larger than xmax.
|
||||
if xr0_not_supplied:
|
||||
xr0 = xl0 + xp.minimum((xmax - xl0)/ 8, 1.0)
|
||||
xr0 = xp.astype(xr0, xl0.dtype, copy=False)
|
||||
|
||||
maxiter = xp.asarray(maxiter)
|
||||
message = '`maxiter` must be a non-negative integer.'
|
||||
if (not xp.isdtype(maxiter.dtype, "numeric") or maxiter.shape != tuple()
|
||||
or xp.isdtype(maxiter.dtype, "complex floating")):
|
||||
raise ValueError(message)
|
||||
maxiter_int = int(maxiter[()])
|
||||
if not maxiter == maxiter_int or maxiter < 0:
|
||||
raise ValueError(message)
|
||||
|
||||
return func, xl0, xr0, xmin, xmax, factor, args, maxiter, xp
|
||||
|
||||
|
||||
def _bracket_root(func, xl0, xr0=None, *, xmin=None, xmax=None, factor=None,
|
||||
args=(), maxiter=1000):
|
||||
"""Bracket the root of a monotonic scalar function of one variable
|
||||
|
||||
This function works elementwise when `xl0`, `xr0`, `xmin`, `xmax`, `factor`, and
|
||||
the elements of `args` are broadcastable arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The function for which the root is to be bracketed.
|
||||
The signature must be::
|
||||
|
||||
func(x: ndarray, *args) -> ndarray
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with `x`. ``func`` must be an elementwise function: each element
|
||||
``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
|
||||
xl0, xr0: float array_like
|
||||
Starting guess of bracket, which need not contain a root. If `xr0` is
|
||||
not provided, ``xr0 = xl0 + 1``. Must be broadcastable with one another.
|
||||
xmin, xmax : float array_like, optional
|
||||
Minimum and maximum allowable endpoints of the bracket, inclusive. Must
|
||||
be broadcastable with `xl0` and `xr0`.
|
||||
factor : float array_like, default: 2
|
||||
The factor used to grow the bracket. See notes for details.
|
||||
args : tuple, optional
|
||||
Additional positional arguments to be passed to `func`. Must be arrays
|
||||
broadcastable with `xl0`, `xr0`, `xmin`, and `xmax`. If the callable to be
|
||||
bracketed requires arguments that are not broadcastable with these
|
||||
arrays, wrap that callable with `func` such that `func` accepts
|
||||
only `x` and broadcastable arrays.
|
||||
maxiter : int, optional
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An instance of `scipy._lib._util._RichResult` with the following
|
||||
attributes. The descriptions are written as though the values will be
|
||||
scalars; however, if `func` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
xl, xr : float
|
||||
The lower and upper ends of the bracket, if the algorithm
|
||||
terminated successfully.
|
||||
fl, fr : float
|
||||
The function value at the lower and upper ends of the bracket.
|
||||
nfev : int
|
||||
The number of function evaluations required to find the bracket.
|
||||
This is distinct from the number of times `func` is *called*
|
||||
because the function may evaluated at multiple points in a single
|
||||
call.
|
||||
nit : int
|
||||
The number of iterations of the algorithm that were performed.
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
- ``0`` : The algorithm produced a valid bracket.
|
||||
- ``-1`` : The bracket expanded to the allowable limits without finding a bracket.
|
||||
- ``-2`` : The maximum number of iterations was reached.
|
||||
- ``-3`` : A non-finite value was encountered.
|
||||
- ``-4`` : Iteration was terminated by `callback`.
|
||||
- ``-5``: The initial bracket does not satisfy `xmin <= xl0 < xr0 < xmax`.
|
||||
- ``1`` : The algorithm is proceeding normally (in `callback` only).
|
||||
- ``2`` : A bracket was found in the opposite search direction (in `callback` only).
|
||||
|
||||
success : bool
|
||||
``True`` when the algorithm terminated successfully (status ``0``).
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function generalizes an algorithm found in pieces throughout
|
||||
`scipy.stats`. The strategy is to iteratively grow the bracket ``(l, r)``
|
||||
until ``func(l) < 0 < func(r)``. The bracket grows to the left as follows.
|
||||
|
||||
- If `xmin` is not provided, the distance between `xl0` and `l` is iteratively
|
||||
increased by `factor`.
|
||||
- If `xmin` is provided, the distance between `xmin` and `l` is iteratively
|
||||
decreased by `factor`. Note that this also *increases* the bracket size.
|
||||
|
||||
Growth of the bracket to the right is analogous.
|
||||
|
||||
Growth of the bracket in one direction stops when the endpoint is no longer
|
||||
finite, the function value at the endpoint is no longer finite, or the
|
||||
endpoint reaches its limiting value (`xmin` or `xmax`). Iteration terminates
|
||||
when the bracket stops growing in both directions, the bracket surrounds
|
||||
the root, or a root is found (accidentally).
|
||||
|
||||
If two brackets are found - that is, a bracket is found on both sides in
|
||||
the same iteration, the smaller of the two is returned.
|
||||
If roots of the function are found, both `l` and `r` are set to the
|
||||
leftmost root.
|
||||
|
||||
""" # noqa: E501
|
||||
# Todo:
|
||||
# - find bracket with sign change in specified direction
|
||||
# - Add tolerance
|
||||
# - allow factor < 1?
|
||||
|
||||
callback = None # works; I just don't want to test it
|
||||
temp = _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter)
|
||||
func, xl0, xr0, xmin, xmax, factor, args, maxiter, xp = temp
|
||||
|
||||
xs = (xl0, xr0)
|
||||
temp = eim._initialize(func, xs, args)
|
||||
func, xs, fs, args, shape, dtype, xp = temp # line split for PEP8
|
||||
xl0, xr0 = xs
|
||||
xmin = xp_ravel(xp.astype(xp.broadcast_to(xmin, shape), dtype, copy=False), xp=xp)
|
||||
xmax = xp_ravel(xp.astype(xp.broadcast_to(xmax, shape), dtype, copy=False), xp=xp)
|
||||
invalid_bracket = ~((xmin <= xl0) & (xl0 < xr0) & (xr0 <= xmax))
|
||||
|
||||
# The approach is to treat the left and right searches as though they were
|
||||
# (almost) totally independent one-sided bracket searches. (The interaction
|
||||
# is considered when checking for termination and preparing the result
|
||||
# object.)
|
||||
# `x` is the "moving" end of the bracket
|
||||
x = xp.concat(xs)
|
||||
f = xp.concat(fs)
|
||||
invalid_bracket = xp.concat((invalid_bracket, invalid_bracket))
|
||||
n = x.shape[0] // 2
|
||||
|
||||
# `x_last` is the previous location of the moving end of the bracket. If
|
||||
# the signs of `f` and `f_last` are different, `x` and `x_last` form a
|
||||
# bracket.
|
||||
x_last = xp.concat((x[n:], x[:n]))
|
||||
f_last = xp.concat((f[n:], f[:n]))
|
||||
# `x0` is the "fixed" end of the bracket.
|
||||
x0 = x_last
|
||||
# We don't need to retain the corresponding function value, since the
|
||||
# fixed end of the bracket is only needed to compute the new value of the
|
||||
# moving end; it is never returned.
|
||||
limit = xp.concat((xmin, xmax))
|
||||
|
||||
factor = xp_ravel(xp.broadcast_to(factor, shape), xp=xp)
|
||||
factor = xp.astype(factor, dtype, copy=False)
|
||||
factor = xp.concat((factor, factor))
|
||||
|
||||
active = xp.arange(2*n)
|
||||
args = [xp.concat((arg, arg)) for arg in args]
|
||||
|
||||
# This is needed due to inner workings of `eim._loop`.
|
||||
# We're abusing it a tiny bit.
|
||||
shape = shape + (2,)
|
||||
|
||||
# `d` is for "distance".
|
||||
# For searches without a limit, the distance between the fixed end of the
|
||||
# bracket `x0` and the moving end `x` will grow by `factor` each iteration.
|
||||
# For searches with a limit, the distance between the `limit` and moving
|
||||
# end of the bracket `x` will shrink by `factor` each iteration.
|
||||
i = xp.isinf(limit)
|
||||
ni = ~i
|
||||
d = xp.zeros_like(x)
|
||||
d[i] = x[i] - x0[i]
|
||||
d[ni] = limit[ni] - x[ni]
|
||||
|
||||
status = xp.full_like(x, eim._EINPROGRESS, dtype=xp.int32) # in progress
|
||||
status[invalid_bracket] = eim._EINPUTERR
|
||||
nit, nfev = 0, 1 # one function evaluation per side performed above
|
||||
|
||||
work = _RichResult(x=x, x0=x0, f=f, limit=limit, factor=factor,
|
||||
active=active, d=d, x_last=x_last, f_last=f_last,
|
||||
nit=nit, nfev=nfev, status=status, args=args,
|
||||
xl=xp.nan, xr=xp.nan, fl=xp.nan, fr=xp.nan, n=n)
|
||||
res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xr', 'xr'),
|
||||
('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'),
|
||||
('fr', 'fr'), ('x', 'x'), ('f', 'f'),
|
||||
('x_last', 'x_last'), ('f_last', 'f_last')]
|
||||
|
||||
def pre_func_eval(work):
|
||||
# Initialize moving end of bracket
|
||||
x = xp.zeros_like(work.x)
|
||||
|
||||
# Unlimited brackets grow by `factor` by increasing distance from fixed
|
||||
# end to moving end.
|
||||
i = xp.isinf(work.limit) # indices of unlimited brackets
|
||||
work.d[i] *= work.factor[i]
|
||||
x[i] = work.x0[i] + work.d[i]
|
||||
|
||||
# Limited brackets grow by decreasing the distance from the limit to
|
||||
# the moving end.
|
||||
ni = ~i # indices of limited brackets
|
||||
work.d[ni] /= work.factor[ni]
|
||||
x[ni] = work.limit[ni] - work.d[ni]
|
||||
|
||||
return x
|
||||
|
||||
def post_func_eval(x, f, work):
|
||||
# Keep track of the previous location of the moving end so that we can
|
||||
# return a narrower bracket. (The alternative is to remember the
|
||||
# original fixed end, but then the bracket would be wider than needed.)
|
||||
work.x_last = work.x
|
||||
work.f_last = work.f
|
||||
work.x = x
|
||||
work.f = f
|
||||
|
||||
def check_termination(work):
|
||||
# Condition 0: initial bracket is invalid
|
||||
stop = (work.status == eim._EINPUTERR)
|
||||
|
||||
# Condition 1: a valid bracket (or the root itself) has been found
|
||||
sf = xp.sign(work.f)
|
||||
sf_last = xp.sign(work.f_last)
|
||||
i = ((sf_last == -sf) | (sf_last == 0) | (sf == 0)) & ~stop
|
||||
work.status[i] = eim._ECONVERGED
|
||||
stop[i] = True
|
||||
|
||||
# Condition 2: the other side's search found a valid bracket.
|
||||
# (If we just found a bracket with the rightward search, we can stop
|
||||
# the leftward search, and vice-versa.)
|
||||
# To do this, we need to set the status of the other side's search;
|
||||
# this is tricky because `work.status` contains only the *active*
|
||||
# elements, so we don't immediately know the index of the element we
|
||||
# need to set - or even if it's still there. (That search may have
|
||||
# terminated already, e.g. by reaching its `limit`.)
|
||||
# To facilitate this, `work.active` contains a unit integer index of
|
||||
# each search. Index `k` (`k < n)` and `k + n` correspond with a
|
||||
# leftward and rightward search, respectively. Elements are removed
|
||||
# from `work.active` just as they are removed from `work.status`, so
|
||||
# we use `work.active` to help find the right location in
|
||||
# `work.status`.
|
||||
# Get the integer indices of the elements that can also stop
|
||||
also_stop = (work.active[i] + work.n) % (2*work.n)
|
||||
# Check whether they are still active. We want to find the indices
|
||||
# in work.active where the associated values in work.active are
|
||||
# contained in also_stop. xp.searchsorted let's us take advantage
|
||||
# of work.active being sorted, but requires some hackery because
|
||||
# searchsorted solves the separate but related problem of finding
|
||||
# the indices where the values in also_stop should be added to
|
||||
# maintain sorted order.
|
||||
j = xp.searchsorted(work.active, also_stop)
|
||||
# If the location exceeds the length of the `work.active`, they are
|
||||
# not there. This happens when a value in also_stop is larger than
|
||||
# the greatest value in work.active. This case needs special handling
|
||||
# because we cannot simply check that also_stop == work.active[j].
|
||||
mask = j < work.active.shape[0]
|
||||
# Note that we also have to use the mask to filter also_stop to ensure
|
||||
# that also_stop and j will still have the same shape.
|
||||
j, also_stop = j[mask], also_stop[mask]
|
||||
j = j[also_stop == work.active[j]]
|
||||
# Now convert these to boolean indices to use with `work.status`.
|
||||
i = xp.zeros_like(stop)
|
||||
i[j] = True # boolean indices of elements that can also stop
|
||||
i = i & ~stop
|
||||
work.status[i] = _ESTOPONESIDE
|
||||
stop[i] = True
|
||||
|
||||
# Condition 3: moving end of bracket reaches limit
|
||||
i = (work.x == work.limit) & ~stop
|
||||
work.status[i] = _ELIMITS
|
||||
stop[i] = True
|
||||
|
||||
# Condition 4: non-finite value encountered
|
||||
i = ~(xp.isfinite(work.x) & xp.isfinite(work.f)) & ~stop
|
||||
work.status[i] = eim._EVALUEERR
|
||||
stop[i] = True
|
||||
|
||||
return stop
|
||||
|
||||
def post_termination_check(work):
|
||||
pass
|
||||
|
||||
def customize_result(res, shape):
|
||||
n = res['x'].shape[0] // 2
|
||||
|
||||
# To avoid ambiguity, below we refer to `xl0`, the initial left endpoint
|
||||
# as `a` and `xr0`, the initial right endpoint, as `b`.
|
||||
# Because we treat the two one-sided searches as though they were
|
||||
# independent, what we keep track of in `work` and what we want to
|
||||
# return in `res` look quite different. Combine the results from the
|
||||
# two one-sided searches before reporting the results to the user.
|
||||
# - "a" refers to the leftward search (the moving end started at `a`)
|
||||
# - "b" refers to the rightward search (the moving end started at `b`)
|
||||
# - "l" refers to the left end of the bracket (closer to -oo)
|
||||
# - "r" refers to the right end of the bracket (closer to +oo)
|
||||
xal = res['x'][:n]
|
||||
xar = res['x_last'][:n]
|
||||
xbl = res['x_last'][n:]
|
||||
xbr = res['x'][n:]
|
||||
|
||||
fal = res['f'][:n]
|
||||
far = res['f_last'][:n]
|
||||
fbl = res['f_last'][n:]
|
||||
fbr = res['f'][n:]
|
||||
|
||||
# Initialize the brackets and corresponding function values to return
|
||||
# to the user. Brackets may not be valid (e.g. there is no root,
|
||||
# there weren't enough iterations, NaN encountered), but we still need
|
||||
# to return something. One option would be all NaNs, but what I've
|
||||
# chosen here is the left- and right-most points at which the function
|
||||
# has been evaluated. This gives the user some information about what
|
||||
# interval of the real line has been searched and shows that there is
|
||||
# no sign change between the two ends.
|
||||
xl = xp.asarray(xal, copy=True)
|
||||
fl = xp.asarray(fal, copy=True)
|
||||
xr = xp.asarray(xbr, copy=True)
|
||||
fr = xp.asarray(fbr, copy=True)
|
||||
|
||||
# `status` indicates whether the bracket is valid or not. If so,
|
||||
# we want to adjust the bracket we return to be the narrowest possible
|
||||
# given the points at which we evaluated the function.
|
||||
# For example if bracket "a" is valid and smaller than bracket "b" OR
|
||||
# if bracket "a" is valid and bracket "b" is not valid, we want to
|
||||
# return bracket "a" (and vice versa).
|
||||
sa = res['status'][:n]
|
||||
sb = res['status'][n:]
|
||||
|
||||
da = xar - xal
|
||||
db = xbr - xbl
|
||||
|
||||
i1 = ((da <= db) & (sa == 0)) | ((sa == 0) & (sb != 0))
|
||||
i2 = ((db <= da) & (sb == 0)) | ((sb == 0) & (sa != 0))
|
||||
|
||||
xr[i1] = xar[i1]
|
||||
fr[i1] = far[i1]
|
||||
xl[i2] = xbl[i2]
|
||||
fl[i2] = fbl[i2]
|
||||
|
||||
# Finish assembling the result object
|
||||
res['xl'] = xl
|
||||
res['xr'] = xr
|
||||
res['fl'] = fl
|
||||
res['fr'] = fr
|
||||
|
||||
res['nit'] = xp.maximum(res['nit'][:n], res['nit'][n:])
|
||||
res['nfev'] = res['nfev'][:n] + res['nfev'][n:]
|
||||
# If the status on one side is zero, the status is zero. In any case,
|
||||
# report the status from one side only.
|
||||
res['status'] = xp.where(sa == 0, sa, sb)
|
||||
res['success'] = (res['status'] == 0)
|
||||
|
||||
del res['x']
|
||||
del res['f']
|
||||
del res['x_last']
|
||||
del res['f_last']
|
||||
|
||||
return shape[:-1]
|
||||
|
||||
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
||||
pre_func_eval, post_func_eval, check_termination,
|
||||
post_termination_check, customize_result, res_work_pairs,
|
||||
xp)
|
||||
|
||||
|
||||
def _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter):
|
||||
|
||||
if not callable(func):
|
||||
raise ValueError('`func` must be callable.')
|
||||
|
||||
if not np.iterable(args):
|
||||
args = (args,)
|
||||
|
||||
xp = array_namespace(xm0, xl0, xr0, xmin, xmax, factor, *args)
|
||||
|
||||
xmin = -xp.inf if xmin is None else xmin
|
||||
xmax = xp.inf if xmax is None else xmax
|
||||
|
||||
# If xl0 (xr0) is not supplied, fill with a dummy value for the sake
|
||||
# of broadcasting. We need to wait until xmin (xmax) has been validated
|
||||
# to compute the default values.
|
||||
xl0_not_supplied = False
|
||||
if xl0 is None:
|
||||
xl0 = xp.nan
|
||||
xl0_not_supplied = True
|
||||
|
||||
xr0_not_supplied = False
|
||||
if xr0 is None:
|
||||
xr0 = xp.nan
|
||||
xr0_not_supplied = True
|
||||
|
||||
factor = 2.0 if factor is None else factor
|
||||
|
||||
xm0, xl0, xr0, xmin, xmax, factor = xp_promote(
|
||||
xm0, xl0, xr0, xmin, xmax, factor, broadcast=True, force_floating=True, xp=xp)
|
||||
|
||||
if not xp.isdtype(xm0.dtype, ('integral', 'real floating')):
|
||||
raise ValueError('`xm0` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xl0.dtype, "numeric")
|
||||
or xp.isdtype(xl0.dtype, "complex floating")):
|
||||
raise ValueError('`xl0` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xr0.dtype, "numeric")
|
||||
or xp.isdtype(xr0.dtype, "complex floating")):
|
||||
raise ValueError('`xr0` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xmin.dtype, "numeric")
|
||||
or xp.isdtype(xmin.dtype, "complex floating")):
|
||||
raise ValueError('`xmin` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(xmax.dtype, "numeric")
|
||||
or xp.isdtype(xmax.dtype, "complex floating")):
|
||||
raise ValueError('`xmax` must be numeric and real.')
|
||||
|
||||
if (not xp.isdtype(factor.dtype, "numeric")
|
||||
or xp.isdtype(factor.dtype, "complex floating")):
|
||||
raise ValueError('`factor` must be numeric and real.')
|
||||
if not xp.all(factor > 1):
|
||||
raise ValueError('All elements of `factor` must be greater than 1.')
|
||||
|
||||
# Calculate default values of xl0 and/or xr0 if they have not been supplied
|
||||
# by the user. We need to be careful to ensure xl0 and xr0 are not outside
|
||||
# of (xmin, xmax).
|
||||
if xl0_not_supplied:
|
||||
xl0 = xm0 - xp.minimum((xm0 - xmin)/16, 0.5)
|
||||
xl0 = xp.astype(xl0, xm0.dtype, copy=False)
|
||||
if xr0_not_supplied:
|
||||
xr0 = xm0 + xp.minimum((xmax - xm0)/16, 0.5)
|
||||
xr0 = xp.astype(xr0, xm0.dtype, copy=False)
|
||||
|
||||
maxiter = xp.asarray(maxiter)
|
||||
message = '`maxiter` must be a non-negative integer.'
|
||||
if (not xp.isdtype(maxiter.dtype, "numeric") or maxiter.shape != tuple()
|
||||
or xp.isdtype(maxiter.dtype, "complex floating")):
|
||||
raise ValueError(message)
|
||||
maxiter_int = int(maxiter[()])
|
||||
if not maxiter == maxiter_int or maxiter < 0:
|
||||
raise ValueError(message)
|
||||
|
||||
return func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter, xp
|
||||
|
||||
|
||||
def _bracket_minimum(func, xm0, *, xl0=None, xr0=None, xmin=None, xmax=None,
|
||||
factor=None, args=(), maxiter=1000):
|
||||
"""Bracket the minimum of a unimodal scalar function of one variable
|
||||
|
||||
This function works elementwise when `xm0`, `xl0`, `xr0`, `xmin`, `xmax`,
|
||||
and the elements of `args` are broadcastable arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The function for which the minimum is to be bracketed.
|
||||
The signature must be::
|
||||
|
||||
func(x: ndarray, *args) -> ndarray
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with ``x``. `func` must be an elementwise function: each element
|
||||
``func(x)[i]`` must equal ``func(x[i])`` for all indices `i`.
|
||||
xm0: float array_like
|
||||
Starting guess for middle point of bracket.
|
||||
xl0, xr0: float array_like, optional
|
||||
Starting guesses for left and right endpoints of the bracket. Must be
|
||||
broadcastable with one another and with `xm0`.
|
||||
xmin, xmax : float array_like, optional
|
||||
Minimum and maximum allowable endpoints of the bracket, inclusive. Must
|
||||
be broadcastable with `xl0`, `xm0`, and `xr0`.
|
||||
factor : float array_like, optional
|
||||
Controls expansion of bracket endpoint in downhill direction. Works
|
||||
differently in the cases where a limit is set in the downhill direction
|
||||
with `xmax` or `xmin`. See Notes.
|
||||
args : tuple, optional
|
||||
Additional positional arguments to be passed to `func`. Must be arrays
|
||||
broadcastable with `xl0`, `xm0`, `xr0`, `xmin`, and `xmax`. If the
|
||||
callable to be bracketed requires arguments that are not broadcastable
|
||||
with these arrays, wrap that callable with `func` such that `func`
|
||||
accepts only ``x`` and broadcastable arrays.
|
||||
maxiter : int, optional
|
||||
The maximum number of iterations of the algorithm to perform. The number
|
||||
of function evaluations is three greater than the number of iterations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An instance of `scipy._lib._util._RichResult` with the following
|
||||
attributes. The descriptions are written as though the values will be
|
||||
scalars; however, if `func` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
xl, xm, xr : float
|
||||
The left, middle, and right points of the bracket, if the algorithm
|
||||
terminated successfully.
|
||||
fl, fm, fr : float
|
||||
The function value at the left, middle, and right points of the bracket.
|
||||
nfev : int
|
||||
The number of function evaluations required to find the bracket.
|
||||
nit : int
|
||||
The number of iterations of the algorithm that were performed.
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
- ``0`` : The algorithm produced a valid bracket.
|
||||
- ``-1`` : The bracket expanded to the allowable limits. Assuming
|
||||
unimodality, this implies the endpoint at the limit is a
|
||||
minimizer.
|
||||
- ``-2`` : The maximum number of iterations was reached.
|
||||
- ``-3`` : A non-finite value was encountered.
|
||||
- ``-4`` : ``None`` shall pass.
|
||||
- ``-5`` : The initial bracket does not satisfy
|
||||
`xmin <= xl0 < xm0 < xr0 <= xmax`.
|
||||
|
||||
success : bool
|
||||
``True`` when the algorithm terminated successfully (status ``0``).
|
||||
|
||||
Notes
|
||||
-----
|
||||
Similar to `scipy.optimize.bracket`, this function seeks to find real
|
||||
points ``xl < xm < xr`` such that ``f(xl) >= f(xm)`` and ``f(xr) >= f(xm)``,
|
||||
where at least one of the inequalities is strict. Unlike `scipy.optimize.bracket`,
|
||||
this function can operate in a vectorized manner on array input, so long as
|
||||
the input arrays are broadcastable with each other. Also unlike
|
||||
`scipy.optimize.bracket`, users may specify minimum and maximum endpoints
|
||||
for the desired bracket.
|
||||
|
||||
Given an initial trio of points ``xl = xl0``, ``xm = xm0``, ``xr = xr0``,
|
||||
the algorithm checks if these points already give a valid bracket. If not,
|
||||
a new endpoint, ``w`` is chosen in the "downhill" direction, ``xm`` becomes the new
|
||||
opposite endpoint, and either `xl` or `xr` becomes the new middle point,
|
||||
depending on which direction is downhill. The algorithm repeats from here.
|
||||
|
||||
The new endpoint `w` is chosen differently depending on whether or not a
|
||||
boundary `xmin` or `xmax` has been set in the downhill direction. Without
|
||||
loss of generality, suppose the downhill direction is to the right, so that
|
||||
``f(xl) > f(xm) > f(xr)``. If there is no boundary to the right, then `w`
|
||||
is chosen to be ``xr + factor * (xr - xm)`` where `factor` is controlled by
|
||||
the user (defaults to 2.0) so that step sizes increase in geometric proportion.
|
||||
If there is a boundary, `xmax` in this case, then `w` is chosen to be
|
||||
``xmax - (xmax - xr)/factor``, with steps slowing to a stop at
|
||||
`xmax`. This cautious approach ensures that a minimum near but distinct from
|
||||
the boundary isn't missed while also detecting whether or not the `xmax` is
|
||||
a minimizer when `xmax` is reached after a finite number of steps.
|
||||
""" # noqa: E501
|
||||
callback = None # works; I just don't want to test it
|
||||
|
||||
temp = _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter)
|
||||
func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter, xp = temp
|
||||
|
||||
xs = (xl0, xm0, xr0)
|
||||
temp = eim._initialize(func, xs, args)
|
||||
func, xs, fs, args, shape, dtype, xp = temp
|
||||
|
||||
xl0, xm0, xr0 = xs
|
||||
fl0, fm0, fr0 = fs
|
||||
xmin = xp.astype(xp.broadcast_to(xmin, shape), dtype, copy=False)
|
||||
xmin = xp_ravel(xmin, xp=xp)
|
||||
xmax = xp.astype(xp.broadcast_to(xmax, shape), dtype, copy=False)
|
||||
xmax = xp_ravel(xmax, xp=xp)
|
||||
invalid_bracket = ~((xmin <= xl0) & (xl0 < xm0) & (xm0 < xr0) & (xr0 <= xmax))
|
||||
# We will modify factor later on so make a copy. np.broadcast_to returns
|
||||
# a read-only view.
|
||||
factor = xp.astype(xp.broadcast_to(factor, shape), dtype, copy=True)
|
||||
factor = xp_ravel(factor)
|
||||
|
||||
# To simplify the logic, swap xl and xr if f(xl) < f(xr). We should always be
|
||||
# marching downhill in the direction from xl to xr.
|
||||
comp = fl0 < fr0
|
||||
xl0[comp], xr0[comp] = xr0[comp], xl0[comp]
|
||||
fl0[comp], fr0[comp] = fr0[comp], fl0[comp]
|
||||
# We only need the boundary in the direction we're traveling.
|
||||
limit = xp.where(comp, xmin, xmax)
|
||||
|
||||
unlimited = xp.isinf(limit)
|
||||
limited = ~unlimited
|
||||
step = xp.empty_like(xl0)
|
||||
|
||||
step[unlimited] = (xr0[unlimited] - xm0[unlimited])
|
||||
step[limited] = (limit[limited] - xr0[limited])
|
||||
|
||||
# Step size is divided by factor for case where there is a limit.
|
||||
factor[limited] = 1 / factor[limited]
|
||||
|
||||
status = xp.full_like(xl0, eim._EINPROGRESS, dtype=xp.int32)
|
||||
status[invalid_bracket] = eim._EINPUTERR
|
||||
nit, nfev = 0, 3
|
||||
|
||||
work = _RichResult(xl=xl0, xm=xm0, xr=xr0, xr0=xr0, fl=fl0, fm=fm0, fr=fr0,
|
||||
step=step, limit=limit, limited=limited, factor=factor, nit=nit,
|
||||
nfev=nfev, status=status, args=args)
|
||||
|
||||
res_work_pairs = [('status', 'status'), ('xl', 'xl'), ('xm', 'xm'), ('xr', 'xr'),
|
||||
('nit', 'nit'), ('nfev', 'nfev'), ('fl', 'fl'), ('fm', 'fm'),
|
||||
('fr', 'fr')]
|
||||
|
||||
def pre_func_eval(work):
|
||||
work.step *= work.factor
|
||||
x = xp.empty_like(work.xr)
|
||||
x[~work.limited] = work.xr0[~work.limited] + work.step[~work.limited]
|
||||
x[work.limited] = work.limit[work.limited] - work.step[work.limited]
|
||||
# Since the new bracket endpoint is calculated from an offset with the
|
||||
# limit, it may be the case that the new endpoint equals the old endpoint,
|
||||
# when the old endpoint is sufficiently close to the limit. We use the
|
||||
# limit itself as the new endpoint in these cases.
|
||||
x[work.limited] = xp.where(
|
||||
x[work.limited] == work.xr[work.limited],
|
||||
work.limit[work.limited],
|
||||
x[work.limited],
|
||||
)
|
||||
return x
|
||||
|
||||
def post_func_eval(x, f, work):
|
||||
work.xl, work.xm, work.xr = work.xm, work.xr, x
|
||||
work.fl, work.fm, work.fr = work.fm, work.fr, f
|
||||
|
||||
def check_termination(work):
|
||||
# Condition 0: Initial bracket is invalid.
|
||||
stop = (work.status == eim._EINPUTERR)
|
||||
|
||||
# Condition 1: A valid bracket has been found.
|
||||
i = (
|
||||
(work.fl >= work.fm) & (work.fr > work.fm)
|
||||
| (work.fl > work.fm) & (work.fr >= work.fm)
|
||||
) & ~stop
|
||||
work.status[i] = eim._ECONVERGED
|
||||
stop[i] = True
|
||||
|
||||
# Condition 2: Moving end of bracket reaches limit.
|
||||
i = (work.xr == work.limit) & ~stop
|
||||
work.status[i] = _ELIMITS
|
||||
stop[i] = True
|
||||
|
||||
# Condition 3: non-finite value encountered
|
||||
i = ~(xp.isfinite(work.xr) & xp.isfinite(work.fr)) & ~stop
|
||||
work.status[i] = eim._EVALUEERR
|
||||
stop[i] = True
|
||||
|
||||
return stop
|
||||
|
||||
def post_termination_check(work):
|
||||
pass
|
||||
|
||||
def customize_result(res, shape):
|
||||
# Reorder entries of xl and xr if they were swapped due to f(xl0) < f(xr0).
|
||||
comp = res['xl'] > res['xr']
|
||||
res['xl'][comp], res['xr'][comp] = res['xr'][comp], res['xl'][comp]
|
||||
res['fl'][comp], res['fr'][comp] = res['fr'][comp], res['fl'][comp]
|
||||
return shape
|
||||
|
||||
return eim._loop(work, callback, shape,
|
||||
maxiter, func, args, dtype,
|
||||
pre_func_eval, post_func_eval,
|
||||
check_termination, post_termination_check,
|
||||
customize_result, res_work_pairs, xp)
|
||||
|
|
@ -0,0 +1,551 @@
|
|||
import math
|
||||
import numpy as np
|
||||
import scipy._lib._elementwise_iterative_method as eim
|
||||
from scipy._lib._util import _RichResult
|
||||
from scipy._lib._array_api import xp_copy
|
||||
|
||||
# TODO:
|
||||
# - (maybe?) don't use fancy indexing assignment
|
||||
# - figure out how to replace the new `try`/`except`s
|
||||
|
||||
|
||||
def _chandrupatla(func, a, b, *, args=(), xatol=None, xrtol=None,
|
||||
fatol=None, frtol=0, maxiter=None, callback=None):
|
||||
"""Find the root of an elementwise function using Chandrupatla's algorithm.
|
||||
|
||||
For each element of the output of `func`, `chandrupatla` seeks the scalar
|
||||
root that makes the element 0. This function allows for `a`, `b`, and the
|
||||
output of `func` to be of any broadcastable shapes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The function whose root is desired. The signature must be::
|
||||
|
||||
func(x: ndarray, *args) -> ndarray
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of components of any type(s).
|
||||
``func`` must be an elementwise function: each element ``func(x)[i]``
|
||||
must equal ``func(x[i])`` for all indices ``i``. `_chandrupatla`
|
||||
seeks an array ``x`` such that ``func(x)`` is an array of zeros.
|
||||
a, b : array_like
|
||||
The lower and upper bounds of the root of the function. Must be
|
||||
broadcastable with one another.
|
||||
args : tuple, optional
|
||||
Additional positional arguments to be passed to `func`.
|
||||
xatol, xrtol, fatol, frtol : float, optional
|
||||
Absolute and relative tolerances on the root and function value.
|
||||
See Notes for details.
|
||||
maxiter : int, optional
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
The default is the maximum possible number of bisections within
|
||||
the (normal) floating point numbers of the relevant dtype.
|
||||
callback : callable, optional
|
||||
An optional user-supplied function to be called before the first
|
||||
iteration and after each iteration.
|
||||
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
||||
similar to that returned by `_chandrupatla` (but containing the current
|
||||
iterate's values of all variables). If `callback` raises a
|
||||
``StopIteration``, the algorithm will terminate immediately and
|
||||
`_chandrupatla` will return a result.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An instance of `scipy._lib._util._RichResult` with the following
|
||||
attributes. The descriptions are written as though the values will be
|
||||
scalars; however, if `func` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
x : float
|
||||
The root of the function, if the algorithm terminated successfully.
|
||||
nfev : int
|
||||
The number of times the function was called to find the root.
|
||||
nit : int
|
||||
The number of iterations of Chandrupatla's algorithm performed.
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
``0`` : The algorithm converged to the specified tolerances.
|
||||
``-1`` : The algorithm encountered an invalid bracket.
|
||||
``-2`` : The maximum number of iterations was reached.
|
||||
``-3`` : A non-finite value was encountered.
|
||||
``-4`` : Iteration was terminated by `callback`.
|
||||
``1`` : The algorithm is proceeding normally (in `callback` only).
|
||||
success : bool
|
||||
``True`` when the algorithm terminated successfully (status ``0``).
|
||||
fun : float
|
||||
The value of `func` evaluated at `x`.
|
||||
xl, xr : float
|
||||
The lower and upper ends of the bracket.
|
||||
fl, fr : float
|
||||
The function value at the lower and upper ends of the bracket.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Implemented based on Chandrupatla's original paper [1]_.
|
||||
|
||||
If ``xl`` and ``xr`` are the left and right ends of the bracket,
|
||||
``xmin = xl if abs(func(xl)) <= abs(func(xr)) else xr``,
|
||||
and ``fmin0 = min(func(a), func(b))``, then the algorithm is considered to
|
||||
have converged when ``abs(xr - xl) < xatol + abs(xmin) * xrtol`` or
|
||||
``fun(xmin) <= fatol + abs(fmin0) * frtol``. This is equivalent to the
|
||||
termination condition described in [1]_ with ``xrtol = 4e-10``,
|
||||
``xatol = 1e-5``, and ``fatol = frtol = 0``. The default values are
|
||||
``xatol = 4*tiny``, ``xrtol = 4*eps``, ``frtol = 0``, and ``fatol = tiny``,
|
||||
where ``eps`` and ``tiny`` are the precision and smallest normal number
|
||||
of the result ``dtype`` of function inputs and outputs.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Chandrupatla, Tirupathi R.
|
||||
"A new hybrid quadratic/bisection algorithm for finding the zero of a
|
||||
nonlinear function without using derivatives".
|
||||
Advances in Engineering Software, 28(3), 145-149.
|
||||
https://doi.org/10.1016/s0965-9978(96)00051-8
|
||||
|
||||
See Also
|
||||
--------
|
||||
brentq, brenth, ridder, bisect, newton
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy import optimize
|
||||
>>> def f(x, c):
|
||||
... return x**3 - 2*x - c
|
||||
>>> c = 5
|
||||
>>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
|
||||
>>> res.x
|
||||
2.0945514818937463
|
||||
|
||||
>>> c = [3, 4, 5]
|
||||
>>> res = optimize._chandrupatla._chandrupatla(f, 0, 3, args=(c,))
|
||||
>>> res.x
|
||||
array([1.8932892 , 2. , 2.09455148])
|
||||
|
||||
"""
|
||||
res = _chandrupatla_iv(func, args, xatol, xrtol,
|
||||
fatol, frtol, maxiter, callback)
|
||||
func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
|
||||
|
||||
# Initialization
|
||||
temp = eim._initialize(func, (a, b), args)
|
||||
func, xs, fs, args, shape, dtype, xp = temp
|
||||
x1, x2 = xs
|
||||
f1, f2 = fs
|
||||
status = xp.full_like(x1, eim._EINPROGRESS,
|
||||
dtype=xp.int32) # in progress
|
||||
nit, nfev = 0, 2 # two function evaluations performed above
|
||||
finfo = xp.finfo(dtype)
|
||||
xatol = 4*finfo.smallest_normal if xatol is None else xatol
|
||||
xrtol = 4*finfo.eps if xrtol is None else xrtol
|
||||
fatol = finfo.smallest_normal if fatol is None else fatol
|
||||
frtol = frtol * xp.minimum(xp.abs(f1), xp.abs(f2))
|
||||
maxiter = (math.log2(finfo.max) - math.log2(finfo.smallest_normal)
|
||||
if maxiter is None else maxiter)
|
||||
work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=None, f3=None, t=0.5,
|
||||
xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
|
||||
nit=nit, nfev=nfev, status=status)
|
||||
res_work_pairs = [('status', 'status'), ('x', 'xmin'), ('fun', 'fmin'),
|
||||
('nit', 'nit'), ('nfev', 'nfev'), ('xl', 'x1'),
|
||||
('fl', 'f1'), ('xr', 'x2'), ('fr', 'f2')]
|
||||
|
||||
def pre_func_eval(work):
|
||||
# [1] Figure 1 (first box)
|
||||
x = work.x1 + work.t * (work.x2 - work.x1)
|
||||
return x
|
||||
|
||||
def post_func_eval(x, f, work):
|
||||
# [1] Figure 1 (first diamond and boxes)
|
||||
# Note: y/n are reversed in figure; compare to BASIC in appendix
|
||||
work.x3, work.f3 = (xp.asarray(work.x2, copy=True),
|
||||
xp.asarray(work.f2, copy=True))
|
||||
j = xp.sign(f) == xp.sign(work.f1)
|
||||
nj = ~j
|
||||
work.x3[j], work.f3[j] = work.x1[j], work.f1[j]
|
||||
work.x2[nj], work.f2[nj] = work.x1[nj], work.f1[nj]
|
||||
work.x1, work.f1 = x, f
|
||||
|
||||
def check_termination(work):
|
||||
# [1] Figure 1 (second diamond)
|
||||
# Check for all terminal conditions and record statuses.
|
||||
|
||||
# See [1] Section 4 (first two sentences)
|
||||
i = xp.abs(work.f1) < xp.abs(work.f2)
|
||||
work.xmin = xp.where(i, work.x1, work.x2)
|
||||
work.fmin = xp.where(i, work.f1, work.f2)
|
||||
stop = xp.zeros_like(work.x1, dtype=xp.bool) # termination condition met
|
||||
|
||||
# If function value tolerance is met, report successful convergence,
|
||||
# regardless of other conditions. Note that `frtol` has been redefined
|
||||
# as `frtol = frtol * minimum(f1, f2)`, where `f1` and `f2` are the
|
||||
# function evaluated at the original ends of the bracket.
|
||||
i = xp.abs(work.fmin) <= work.fatol + work.frtol
|
||||
work.status[i] = eim._ECONVERGED
|
||||
stop[i] = True
|
||||
|
||||
# If the bracket is no longer valid, report failure (unless a function
|
||||
# tolerance is met, as detected above).
|
||||
i = (xp.sign(work.f1) == xp.sign(work.f2)) & ~stop
|
||||
NaN = xp.asarray(xp.nan, dtype=work.xmin.dtype)
|
||||
work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._ESIGNERR
|
||||
stop[i] = True
|
||||
|
||||
# If the abscissae are non-finite or either function value is NaN,
|
||||
# report failure.
|
||||
x_nonfinite = ~(xp.isfinite(work.x1) & xp.isfinite(work.x2))
|
||||
f_nan = xp.isnan(work.f1) & xp.isnan(work.f2)
|
||||
i = (x_nonfinite | f_nan) & ~stop
|
||||
work.xmin[i], work.fmin[i], work.status[i] = NaN, NaN, eim._EVALUEERR
|
||||
stop[i] = True
|
||||
|
||||
# This is the convergence criterion used in bisect. Chandrupatla's
|
||||
# criterion is equivalent to this except with a factor of 4 on `xrtol`.
|
||||
work.dx = xp.abs(work.x2 - work.x1)
|
||||
work.tol = xp.abs(work.xmin) * work.xrtol + work.xatol
|
||||
i = work.dx < work.tol
|
||||
work.status[i] = eim._ECONVERGED
|
||||
stop[i] = True
|
||||
|
||||
return stop
|
||||
|
||||
def post_termination_check(work):
|
||||
# [1] Figure 1 (third diamond and boxes / Equation 1)
|
||||
xi1 = (work.x1 - work.x2) / (work.x3 - work.x2)
|
||||
with np.errstate(divide='ignore', invalid='ignore'):
|
||||
phi1 = (work.f1 - work.f2) / (work.f3 - work.f2)
|
||||
alpha = (work.x3 - work.x1) / (work.x2 - work.x1)
|
||||
j = ((1 - xp.sqrt(1 - xi1)) < phi1) & (phi1 < xp.sqrt(xi1))
|
||||
|
||||
f1j, f2j, f3j, alphaj = work.f1[j], work.f2[j], work.f3[j], alpha[j]
|
||||
t = xp.full_like(alpha, 0.5)
|
||||
t[j] = (f1j / (f1j - f2j) * f3j / (f3j - f2j)
|
||||
- alphaj * f1j / (f3j - f1j) * f2j / (f2j - f3j))
|
||||
|
||||
# [1] Figure 1 (last box; see also BASIC in appendix with comment
|
||||
# "Adjust T Away from the Interval Boundary")
|
||||
tl = 0.5 * work.tol / work.dx
|
||||
work.t = xp.clip(t, tl, 1 - tl)
|
||||
|
||||
def customize_result(res, shape):
|
||||
xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
|
||||
i = res['xl'] < res['xr']
|
||||
res['xl'] = xp.where(i, xl, xr)
|
||||
res['xr'] = xp.where(i, xr, xl)
|
||||
res['fl'] = xp.where(i, fl, fr)
|
||||
res['fr'] = xp.where(i, fr, fl)
|
||||
return shape
|
||||
|
||||
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
||||
pre_func_eval, post_func_eval, check_termination,
|
||||
post_termination_check, customize_result, res_work_pairs,
|
||||
xp=xp)
|
||||
|
||||
|
||||
def _chandrupatla_iv(func, args, xatol, xrtol,
|
||||
fatol, frtol, maxiter, callback):
|
||||
# Input validation for `_chandrupatla`
|
||||
|
||||
if not callable(func):
|
||||
raise ValueError('`func` must be callable.')
|
||||
|
||||
if not np.iterable(args):
|
||||
args = (args,)
|
||||
|
||||
# tolerances are floats, not arrays; OK to use NumPy
|
||||
tols = np.asarray([xatol if xatol is not None else 1,
|
||||
xrtol if xrtol is not None else 1,
|
||||
fatol if fatol is not None else 1,
|
||||
frtol if frtol is not None else 1])
|
||||
if (not np.issubdtype(tols.dtype, np.number) or np.any(tols < 0)
|
||||
or np.any(np.isnan(tols)) or tols.shape != (4,)):
|
||||
raise ValueError('Tolerances must be non-negative scalars.')
|
||||
|
||||
if maxiter is not None:
|
||||
maxiter_int = int(maxiter)
|
||||
if maxiter != maxiter_int or maxiter < 0:
|
||||
raise ValueError('`maxiter` must be a non-negative integer.')
|
||||
|
||||
if callback is not None and not callable(callback):
|
||||
raise ValueError('`callback` must be callable.')
|
||||
|
||||
return func, args, xatol, xrtol, fatol, frtol, maxiter, callback
|
||||
|
||||
|
||||
def _chandrupatla_minimize(func, x1, x2, x3, *, args=(), xatol=None,
|
||||
xrtol=None, fatol=None, frtol=None, maxiter=100,
|
||||
callback=None):
|
||||
"""Find the minimizer of an elementwise function.
|
||||
|
||||
For each element of the output of `func`, `_chandrupatla_minimize` seeks
|
||||
the scalar minimizer that minimizes the element. This function allows for
|
||||
`x1`, `x2`, `x3`, and the elements of `args` to be arrays of any
|
||||
broadcastable shapes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The function whose minimizer is desired. The signature must be::
|
||||
|
||||
func(x: ndarray, *args) -> ndarray
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with `x`. ``func`` must be an elementwise function: each element
|
||||
``func(x)[i]`` must equal ``func(x[i])`` for all indices ``i``.
|
||||
`_chandrupatla` seeks an array ``x`` such that ``func(x)`` is an array
|
||||
of minima.
|
||||
x1, x2, x3 : array_like
|
||||
The abscissae of a standard scalar minimization bracket. A bracket is
|
||||
valid if ``x1 < x2 < x3`` and ``func(x1) > func(x2) <= func(x3)``.
|
||||
Must be broadcastable with one another and `args`.
|
||||
args : tuple, optional
|
||||
Additional positional arguments to be passed to `func`. Must be arrays
|
||||
broadcastable with `x1`, `x2`, and `x3`. If the callable to be
|
||||
differentiated requires arguments that are not broadcastable with `x`,
|
||||
wrap that callable with `func` such that `func` accepts only `x` and
|
||||
broadcastable arrays.
|
||||
xatol, xrtol, fatol, frtol : float, optional
|
||||
Absolute and relative tolerances on the minimizer and function value.
|
||||
See Notes for details.
|
||||
maxiter : int, optional
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
callback : callable, optional
|
||||
An optional user-supplied function to be called before the first
|
||||
iteration and after each iteration.
|
||||
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
||||
similar to that returned by `_chandrupatla_minimize` (but containing
|
||||
the current iterate's values of all variables). If `callback` raises a
|
||||
``StopIteration``, the algorithm will terminate immediately and
|
||||
`_chandrupatla_minimize` will return a result.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An instance of `scipy._lib._util._RichResult` with the following
|
||||
attributes. (The descriptions are written as though the values will be
|
||||
scalars; however, if `func` returns an array, the outputs will be
|
||||
arrays of the same shape.)
|
||||
|
||||
success : bool
|
||||
``True`` when the algorithm terminated successfully (status ``0``).
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
``0`` : The algorithm converged to the specified tolerances.
|
||||
``-1`` : The algorithm encountered an invalid bracket.
|
||||
``-2`` : The maximum number of iterations was reached.
|
||||
``-3`` : A non-finite value was encountered.
|
||||
``-4`` : Iteration was terminated by `callback`.
|
||||
``1`` : The algorithm is proceeding normally (in `callback` only).
|
||||
x : float
|
||||
The minimizer of the function, if the algorithm terminated
|
||||
successfully.
|
||||
fun : float
|
||||
The value of `func` evaluated at `x`.
|
||||
nfev : int
|
||||
The number of points at which `func` was evaluated.
|
||||
nit : int
|
||||
The number of iterations of the algorithm that were performed.
|
||||
xl, xm, xr : float
|
||||
The final three-point bracket.
|
||||
fl, fm, fr : float
|
||||
The function value at the bracket points.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Implemented based on Chandrupatla's original paper [1]_.
|
||||
|
||||
If ``x1 < x2 < x3`` are the points of the bracket and ``f1 > f2 <= f3``
|
||||
are the values of ``func`` at those points, then the algorithm is
|
||||
considered to have converged when ``x3 - x1 <= abs(x2)*xrtol + xatol``
|
||||
or ``(f1 - 2*f2 + f3)/2 <= abs(f2)*frtol + fatol``. Note that first of
|
||||
these differs from the termination conditions described in [1]_. The
|
||||
default values of `xrtol` is the square root of the precision of the
|
||||
appropriate dtype, and ``xatol = fatol = frtol`` is the smallest normal
|
||||
number of the appropriate dtype.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Chandrupatla, Tirupathi R. (1998).
|
||||
"An efficient quadratic fit-sectioning algorithm for minimization
|
||||
without derivatives".
|
||||
Computer Methods in Applied Mechanics and Engineering, 152 (1-2),
|
||||
211-217. https://doi.org/10.1016/S0045-7825(97)00190-4
|
||||
|
||||
See Also
|
||||
--------
|
||||
golden, brent, bounded
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.optimize._chandrupatla import _chandrupatla_minimize
|
||||
>>> def f(x, args=1):
|
||||
... return (x - args)**2
|
||||
>>> res = _chandrupatla_minimize(f, -5, 0, 5)
|
||||
>>> res.x
|
||||
1.0
|
||||
>>> c = [1, 1.5, 2]
|
||||
>>> res = _chandrupatla_minimize(f, -5, 0, 5, args=(c,))
|
||||
>>> res.x
|
||||
array([1. , 1.5, 2. ])
|
||||
"""
|
||||
res = _chandrupatla_iv(func, args, xatol, xrtol,
|
||||
fatol, frtol, maxiter, callback)
|
||||
func, args, xatol, xrtol, fatol, frtol, maxiter, callback = res
|
||||
|
||||
# Initialization
|
||||
xs = (x1, x2, x3)
|
||||
temp = eim._initialize(func, xs, args)
|
||||
func, xs, fs, args, shape, dtype, xp = temp # line split for PEP8
|
||||
x1, x2, x3 = xs
|
||||
f1, f2, f3 = fs
|
||||
phi = xp.asarray(0.5 + 0.5*5**0.5, dtype=dtype)[()] # golden ratio
|
||||
status = xp.full_like(x1, eim._EINPROGRESS, dtype=xp.int32) # in progress
|
||||
nit, nfev = 0, 3 # three function evaluations performed above
|
||||
fatol = xp.finfo(dtype).smallest_normal if fatol is None else fatol
|
||||
frtol = xp.finfo(dtype).smallest_normal if frtol is None else frtol
|
||||
xatol = xp.finfo(dtype).smallest_normal if xatol is None else xatol
|
||||
xrtol = math.sqrt(xp.finfo(dtype).eps) if xrtol is None else xrtol
|
||||
|
||||
# Ensure that x1 < x2 < x3 initially.
|
||||
xs, fs = xp.stack((x1, x2, x3)), xp.stack((f1, f2, f3))
|
||||
i = xp.argsort(xs, axis=0)
|
||||
x1, x2, x3 = xp.take_along_axis(xs, i, axis=0) # data-apis/array-api#808
|
||||
f1, f2, f3 = xp.take_along_axis(fs, i, axis=0) # data-apis/array-api#808
|
||||
q0 = xp_copy(x3) # "At the start, q0 is set at x3..." ([1] after (7))
|
||||
|
||||
work = _RichResult(x1=x1, f1=f1, x2=x2, f2=f2, x3=x3, f3=f3, phi=phi,
|
||||
xatol=xatol, xrtol=xrtol, fatol=fatol, frtol=frtol,
|
||||
nit=nit, nfev=nfev, status=status, q0=q0, args=args)
|
||||
res_work_pairs = [('status', 'status'),
|
||||
('x', 'x2'), ('fun', 'f2'),
|
||||
('nit', 'nit'), ('nfev', 'nfev'),
|
||||
('xl', 'x1'), ('xm', 'x2'), ('xr', 'x3'),
|
||||
('fl', 'f1'), ('fm', 'f2'), ('fr', 'f3')]
|
||||
|
||||
def pre_func_eval(work):
|
||||
# `_check_termination` is called first -> `x3 - x2 > x2 - x1`
|
||||
# But let's calculate a few terms that we'll reuse
|
||||
x21 = work.x2 - work.x1
|
||||
x32 = work.x3 - work.x2
|
||||
|
||||
# [1] Section 3. "The quadratic minimum point Q1 is calculated using
|
||||
# the relations developed in the previous section." [1] Section 2 (5/6)
|
||||
A = x21 * (work.f3 - work.f2)
|
||||
B = x32 * (work.f1 - work.f2)
|
||||
C = A / (A + B)
|
||||
# q1 = C * (work.x1 + work.x2) / 2 + (1 - C) * (work.x2 + work.x3) / 2
|
||||
q1 = 0.5 * (C*(work.x1 - work.x3) + work.x2 + work.x3) # much faster
|
||||
# this is an array, so multiplying by 0.5 does not change dtype
|
||||
|
||||
# "If Q1 and Q0 are sufficiently close... Q1 is accepted if it is
|
||||
# sufficiently away from the inside point x2"
|
||||
i = xp.abs(q1 - work.q0) < 0.5 * xp.abs(x21) # [1] (7)
|
||||
xi = q1[i]
|
||||
# Later, after (9), "If the point Q1 is in a +/- xtol neighborhood of
|
||||
# x2, the new point is chosen in the larger interval at a distance
|
||||
# tol away from x2."
|
||||
# See also QBASIC code after "Accept Ql adjust if close to X2".
|
||||
j = xp.abs(q1[i] - work.x2[i]) <= work.xtol[i]
|
||||
xi[j] = work.x2[i][j] + xp.sign(x32[i][j]) * work.xtol[i][j]
|
||||
|
||||
# "If condition (7) is not satisfied, golden sectioning of the larger
|
||||
# interval is carried out to introduce the new point."
|
||||
# (For simplicity, we go ahead and calculate it for all points, but we
|
||||
# change the elements for which the condition was satisfied.)
|
||||
x = work.x2 + (2 - work.phi) * x32
|
||||
x[i] = xi
|
||||
|
||||
# "We define Q0 as the value of Q1 at the previous iteration."
|
||||
work.q0 = q1
|
||||
return x
|
||||
|
||||
def post_func_eval(x, f, work):
|
||||
# Standard logic for updating a three-point bracket based on a new
|
||||
# point. In QBASIC code, see "IF SGN(X-X2) = SGN(X3-X2) THEN...".
|
||||
# There is an awful lot of data copying going on here; this would
|
||||
# probably benefit from code optimization or implementation in Pythran.
|
||||
i = xp.sign(x - work.x2) == xp.sign(work.x3 - work.x2)
|
||||
xi, x1i, x2i, x3i = x[i], work.x1[i], work.x2[i], work.x3[i],
|
||||
fi, f1i, f2i, f3i = f[i], work.f1[i], work.f2[i], work.f3[i]
|
||||
j = fi > f2i
|
||||
x3i[j], f3i[j] = xi[j], fi[j]
|
||||
j = ~j
|
||||
x1i[j], f1i[j], x2i[j], f2i[j] = x2i[j], f2i[j], xi[j], fi[j]
|
||||
|
||||
ni = ~i
|
||||
xni, x1ni, x2ni, x3ni = x[ni], work.x1[ni], work.x2[ni], work.x3[ni],
|
||||
fni, f1ni, f2ni, f3ni = f[ni], work.f1[ni], work.f2[ni], work.f3[ni]
|
||||
j = fni > f2ni
|
||||
x1ni[j], f1ni[j] = xni[j], fni[j]
|
||||
j = ~j
|
||||
x3ni[j], f3ni[j], x2ni[j], f2ni[j] = x2ni[j], f2ni[j], xni[j], fni[j]
|
||||
|
||||
work.x1[i], work.x2[i], work.x3[i] = x1i, x2i, x3i
|
||||
work.f1[i], work.f2[i], work.f3[i] = f1i, f2i, f3i
|
||||
work.x1[ni], work.x2[ni], work.x3[ni] = x1ni, x2ni, x3ni,
|
||||
work.f1[ni], work.f2[ni], work.f3[ni] = f1ni, f2ni, f3ni
|
||||
|
||||
def check_termination(work):
|
||||
# Check for all terminal conditions and record statuses.
|
||||
stop = xp.zeros_like(work.x1, dtype=bool) # termination condition met
|
||||
|
||||
# Bracket is invalid; stop and don't return minimizer/minimum
|
||||
i = ((work.f2 > work.f1) | (work.f2 > work.f3))
|
||||
work.x2[i], work.f2[i] = xp.nan, xp.nan
|
||||
stop[i], work.status[i] = True, eim._ESIGNERR
|
||||
|
||||
# Non-finite values; stop and don't return minimizer/minimum
|
||||
finite = xp.isfinite(work.x1+work.x2+work.x3+work.f1+work.f2+work.f3)
|
||||
i = ~(finite | stop)
|
||||
work.x2[i], work.f2[i] = xp.nan, xp.nan
|
||||
stop[i], work.status[i] = True, eim._EVALUEERR
|
||||
|
||||
# [1] Section 3 "Points 1 and 3 are interchanged if necessary to make
|
||||
# the (x2, x3) the larger interval."
|
||||
# Note: I had used np.choose; this is much faster. This would be a good
|
||||
# place to save e.g. `work.x3 - work.x2` for reuse, but I tried and
|
||||
# didn't notice a speed boost, so let's keep it simple.
|
||||
i = xp.abs(work.x3 - work.x2) < xp.abs(work.x2 - work.x1)
|
||||
temp = work.x1[i]
|
||||
work.x1[i] = work.x3[i]
|
||||
work.x3[i] = temp
|
||||
temp = work.f1[i]
|
||||
work.f1[i] = work.f3[i]
|
||||
work.f3[i] = temp
|
||||
|
||||
# [1] Section 3 (bottom of page 212)
|
||||
# "We set a tolerance value xtol..."
|
||||
work.xtol = xp.abs(work.x2) * work.xrtol + work.xatol # [1] (8)
|
||||
# "The convergence based on interval is achieved when..."
|
||||
# Note: Equality allowed in case of `xtol=0`
|
||||
i = xp.abs(work.x3 - work.x2) <= 2 * work.xtol # [1] (9)
|
||||
|
||||
# "We define ftol using..."
|
||||
ftol = xp.abs(work.f2) * work.frtol + work.fatol # [1] (10)
|
||||
# "The convergence based on function values is achieved when..."
|
||||
# Note 1: modify in place to incorporate tolerance on function value.
|
||||
# Note 2: factor of 2 is not in the text; see QBASIC start of DO loop
|
||||
i |= (work.f1 - 2 * work.f2 + work.f3) <= 2*ftol # [1] (11)
|
||||
i &= ~stop
|
||||
stop[i], work.status[i] = True, eim._ECONVERGED
|
||||
|
||||
return stop
|
||||
|
||||
def post_termination_check(work):
|
||||
pass
|
||||
|
||||
def customize_result(res, shape):
|
||||
xl, xr, fl, fr = res['xl'], res['xr'], res['fl'], res['fr']
|
||||
i = res['xl'] >= res['xr']
|
||||
res['xl'] = xp.where(i, xr, xl)
|
||||
res['xr'] = xp.where(i, xl, xr)
|
||||
res['fl'] = xp.where(i, fr, fl)
|
||||
res['fr'] = xp.where(i, fl, fr)
|
||||
return shape
|
||||
|
||||
return eim._loop(work, callback, shape, maxiter, func, args, dtype,
|
||||
pre_func_eval, post_func_eval, check_termination,
|
||||
post_termination_check, customize_result, res_work_pairs,
|
||||
xp=xp)
|
||||
297
venv/lib/python3.13/site-packages/scipy/optimize/_cobyla_py.py
Normal file
297
venv/lib/python3.13/site-packages/scipy/optimize/_cobyla_py.py
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
"""
|
||||
Interface to Constrained Optimization By Linear Approximation
|
||||
|
||||
Functions
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fmin_cobyla
|
||||
|
||||
"""
|
||||
|
||||
from inspect import signature
|
||||
|
||||
import numpy as np
|
||||
from ._optimize import (OptimizeResult, _check_unknown_options,
|
||||
_prepare_scalar_function)
|
||||
from ._constraints import NonlinearConstraint
|
||||
|
||||
|
||||
__all__ = ['fmin_cobyla']
|
||||
|
||||
|
||||
def fmin_cobyla(func, x0, cons, args=(), consargs=None, rhobeg=1.0,
|
||||
rhoend=1e-4, maxfun=1000, disp=None, catol=2e-4,
|
||||
*, callback=None):
|
||||
"""
|
||||
Minimize a function using the Constrained Optimization By Linear
|
||||
Approximation (COBYLA) method. This method uses the pure-python implementation
|
||||
of the algorithm from PRIMA.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
Function to minimize. In the form func(x, \\*args).
|
||||
x0 : ndarray
|
||||
Initial guess.
|
||||
cons : sequence
|
||||
Constraint functions; must all be ``>=0`` (a single function
|
||||
if only 1 constraint). Each function takes the parameters `x`
|
||||
as its first argument, and it can return either a single number or
|
||||
an array or list of numbers.
|
||||
args : tuple, optional
|
||||
Extra arguments to pass to function.
|
||||
consargs : tuple, optional
|
||||
Extra arguments to pass to constraint functions (default of None means
|
||||
use same extra arguments as those passed to func).
|
||||
Use ``()`` for no extra arguments.
|
||||
rhobeg : float, optional
|
||||
Reasonable initial changes to the variables.
|
||||
rhoend : float, optional
|
||||
Final accuracy in the optimization (not precisely guaranteed). This
|
||||
is a lower bound on the size of the trust region.
|
||||
disp : {0, 1, 2, 3}, optional
|
||||
Controls the frequency of output; 0 implies no output.
|
||||
maxfun : int, optional
|
||||
Maximum number of function evaluations.
|
||||
catol : float, optional
|
||||
Absolute tolerance for constraint violations.
|
||||
callback : callable, optional
|
||||
Called after each iteration, as ``callback(x)``, where ``x`` is the
|
||||
current parameter vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : ndarray
|
||||
The argument that minimises `f`.
|
||||
|
||||
See also
|
||||
--------
|
||||
minimize: Interface to minimization algorithms for multivariate
|
||||
functions. See the 'COBYLA' `method` in particular.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This algorithm is based on linear approximations to the objective
|
||||
function and each constraint. We briefly describe the algorithm.
|
||||
|
||||
Suppose the function is being minimized over k variables. At the
|
||||
jth iteration the algorithm has k+1 points v_1, ..., v_(k+1),
|
||||
an approximate solution x_j, and a radius RHO_j.
|
||||
(i.e., linear plus a constant) approximations to the objective
|
||||
function and constraint functions such that their function values
|
||||
agree with the linear approximation on the k+1 points v_1,.., v_(k+1).
|
||||
This gives a linear program to solve (where the linear approximations
|
||||
of the constraint functions are constrained to be non-negative).
|
||||
|
||||
However, the linear approximations are likely only good
|
||||
approximations near the current simplex, so the linear program is
|
||||
given the further requirement that the solution, which
|
||||
will become x_(j+1), must be within RHO_j from x_j. RHO_j only
|
||||
decreases, never increases. The initial RHO_j is rhobeg and the
|
||||
final RHO_j is rhoend. In this way COBYLA's iterations behave
|
||||
like a trust region algorithm.
|
||||
|
||||
Additionally, the linear program may be inconsistent, or the
|
||||
approximation may give poor improvement. For details about
|
||||
how these issues are resolved, as well as how the points v_i are
|
||||
updated, refer to the source code or the references below.
|
||||
|
||||
.. versionchanged:: 1.16.0
|
||||
The original Powell implementation was replaced by a pure
|
||||
Python version from the PRIMA package, with bug fixes and
|
||||
improvements being made.
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
Powell M.J.D. (1994), "A direct search optimization method that models
|
||||
the objective and constraint functions by linear interpolation.", in
|
||||
Advances in Optimization and Numerical Analysis, eds. S. Gomez and
|
||||
J-P Hennart, Kluwer Academic (Dordrecht), pp. 51-67
|
||||
|
||||
Powell M.J.D. (1998), "Direct search algorithms for optimization
|
||||
calculations", Acta Numerica 7, 287-336
|
||||
|
||||
Powell M.J.D. (2007), "A view of algorithms for optimization without
|
||||
derivatives", Cambridge University Technical Report DAMTP 2007/NA03
|
||||
|
||||
Zhang Z. (2023), "PRIMA: Reference Implementation for Powell's Methods with
|
||||
Modernization and Amelioration", https://www.libprima.net,
|
||||
:doi:`10.5281/zenodo.8052654`
|
||||
|
||||
Examples
|
||||
--------
|
||||
Minimize the objective function f(x,y) = x*y subject
|
||||
to the constraints x**2 + y**2 < 1 and y > 0::
|
||||
|
||||
>>> def objective(x):
|
||||
... return x[0]*x[1]
|
||||
...
|
||||
>>> def constr1(x):
|
||||
... return 1 - (x[0]**2 + x[1]**2)
|
||||
...
|
||||
>>> def constr2(x):
|
||||
... return x[1]
|
||||
...
|
||||
>>> from scipy.optimize import fmin_cobyla
|
||||
>>> fmin_cobyla(objective, [0.0, 0.1], [constr1, constr2], rhoend=1e-7)
|
||||
array([-0.70710685, 0.70710671])
|
||||
|
||||
The exact solution is (-sqrt(2)/2, sqrt(2)/2).
|
||||
|
||||
|
||||
|
||||
"""
|
||||
err = "cons must be a sequence of callable functions or a single"\
|
||||
" callable function."
|
||||
try:
|
||||
len(cons)
|
||||
except TypeError as e:
|
||||
if callable(cons):
|
||||
cons = [cons]
|
||||
else:
|
||||
raise TypeError(err) from e
|
||||
else:
|
||||
for thisfunc in cons:
|
||||
if not callable(thisfunc):
|
||||
raise TypeError(err)
|
||||
|
||||
if consargs is None:
|
||||
consargs = args
|
||||
|
||||
# build constraints
|
||||
nlcs = []
|
||||
for con in cons:
|
||||
# Use default argument, otherwise the last `con` is captured by all wrapped_con
|
||||
def wrapped_con(x, confunc=con):
|
||||
return confunc(x, *consargs)
|
||||
nlcs.append(NonlinearConstraint(wrapped_con, 0, np.inf))
|
||||
|
||||
# options
|
||||
opts = {'rhobeg': rhobeg,
|
||||
'tol': rhoend,
|
||||
'disp': disp,
|
||||
'maxiter': maxfun,
|
||||
'catol': catol,
|
||||
'callback': callback}
|
||||
|
||||
sol = _minimize_cobyla(func, x0, args, constraints=nlcs,
|
||||
**opts)
|
||||
if disp and not sol['success']:
|
||||
print(f"COBYLA failed to find a solution: {sol.message}")
|
||||
return sol['x']
|
||||
|
||||
|
||||
def _minimize_cobyla(fun, x0, args=(), constraints=(),
|
||||
rhobeg=1.0, tol=1e-4, maxiter=1000,
|
||||
disp=0, catol=None, f_target=-np.inf,
|
||||
callback=None, bounds=None, **unknown_options):
|
||||
"""
|
||||
Minimize a scalar function of one or more variables using the
|
||||
Constrained Optimization BY Linear Approximation (COBYLA) algorithm.
|
||||
This method uses the pure-python implementation of the algorithm from PRIMA.
|
||||
|
||||
Options
|
||||
-------
|
||||
rhobeg : float
|
||||
Reasonable initial changes to the variables.
|
||||
tol : float
|
||||
Final accuracy in the optimization (not precisely guaranteed).
|
||||
This is a lower bound on the size of the trust region.
|
||||
disp : int
|
||||
Controls the frequency of output:
|
||||
0. (default) There will be no printing
|
||||
1. A message will be printed to the screen at the end of iteration, showing
|
||||
the best vector of variables found and its objective function value
|
||||
2. in addition to 1, each new value of RHO is printed to the screen,
|
||||
with the best vector of variables so far and its objective function
|
||||
value.
|
||||
3. in addition to 2, each function evaluation with its variables will
|
||||
be printed to the screen.
|
||||
maxiter : int
|
||||
Maximum number of function evaluations.
|
||||
catol : float
|
||||
Tolerance (absolute) for constraint violations
|
||||
f_target : float
|
||||
Stop if the objective function is less than `f_target`.
|
||||
|
||||
.. versionchanged:: 1.16.0
|
||||
The original Powell implementation was replaced by a pure
|
||||
Python version from the PRIMA package, with bug fixes and
|
||||
improvements being made.
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
Zhang Z. (2023), "PRIMA: Reference Implementation for Powell's Methods with
|
||||
Modernization and Amelioration", https://www.libprima.net,
|
||||
:doi:`10.5281/zenodo.8052654`
|
||||
"""
|
||||
from .._lib.pyprima import minimize
|
||||
from .._lib.pyprima.common.infos import SMALL_TR_RADIUS, FTARGET_ACHIEVED
|
||||
from .._lib.pyprima.common.message import get_info_string
|
||||
_check_unknown_options(unknown_options)
|
||||
rhoend = tol
|
||||
iprint = disp if disp is not None else 0
|
||||
if iprint != 0 and iprint != 1 and iprint != 2 and iprint != 3:
|
||||
raise ValueError(f'disp argument to minimize must be 0, 1, 2, or 3,\
|
||||
received {iprint}')
|
||||
|
||||
# create the ScalarFunction, cobyla doesn't require derivative function
|
||||
def _jac(x, *args):
|
||||
return None
|
||||
|
||||
sf = _prepare_scalar_function(fun, x0, args=args, jac=_jac)
|
||||
|
||||
if callback is not None:
|
||||
sig = signature(callback)
|
||||
if set(sig.parameters) == {"intermediate_result"}:
|
||||
def wrapped_callback_intermediate(x, f, nf, tr, cstrv, nlconstrlist):
|
||||
intermediate_result = OptimizeResult(x=np.copy(x), fun=f, nfev=nf,
|
||||
nit=tr, maxcv=cstrv)
|
||||
callback(intermediate_result=intermediate_result)
|
||||
else:
|
||||
def wrapped_callback_intermediate(x, f, nf, tr, cstrv, nlconstrlist):
|
||||
callback(np.copy(x))
|
||||
def wrapped_callback(x, f, nf, tr, cstrv, nlconstrlist):
|
||||
try:
|
||||
wrapped_callback_intermediate(x, f, nf, tr, cstrv, nlconstrlist)
|
||||
return False
|
||||
except StopIteration:
|
||||
return True
|
||||
else:
|
||||
wrapped_callback = None
|
||||
|
||||
|
||||
ctol = catol if catol is not None else np.sqrt(np.finfo(float).eps)
|
||||
options = {
|
||||
'rhobeg': rhobeg,
|
||||
'rhoend': rhoend,
|
||||
'maxfev': maxiter,
|
||||
'iprint': iprint,
|
||||
'ctol': ctol,
|
||||
'ftarget': f_target,
|
||||
}
|
||||
|
||||
result = minimize(sf.fun, x0, method='cobyla', bounds=bounds,
|
||||
constraints=constraints, callback=wrapped_callback,
|
||||
options=options)
|
||||
|
||||
|
||||
if result.cstrv > ctol:
|
||||
success = False
|
||||
message = ('Did not converge to a solution satisfying the constraints. See '
|
||||
'`maxcv` for the magnitude of the violation.')
|
||||
else:
|
||||
success = result.info == SMALL_TR_RADIUS or result.info == FTARGET_ACHIEVED
|
||||
message = get_info_string('COBYLA', result.info)
|
||||
|
||||
return OptimizeResult(x=result.x,
|
||||
status=result.info,
|
||||
success=success,
|
||||
message=message,
|
||||
nfev=result.nf,
|
||||
fun=result.f,
|
||||
maxcv=result.cstrv)
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
import numpy as np
|
||||
from threading import Lock
|
||||
|
||||
from ._optimize import _check_unknown_options
|
||||
|
||||
|
||||
COBYQA_LOCK = Lock()
|
||||
|
||||
|
||||
def _minimize_cobyqa(fun, x0, args=(), bounds=None, constraints=(),
|
||||
callback=None, disp=False, maxfev=None, maxiter=None,
|
||||
f_target=-np.inf, feasibility_tol=1e-8,
|
||||
initial_tr_radius=1.0, final_tr_radius=1e-6, scale=False,
|
||||
**unknown_options):
|
||||
"""
|
||||
Minimize a scalar function of one or more variables using the
|
||||
Constrained Optimization BY Quadratic Approximations (COBYQA) algorithm [1]_.
|
||||
|
||||
.. versionadded:: 1.14.0
|
||||
|
||||
Options
|
||||
-------
|
||||
disp : bool
|
||||
Set to True to print information about the optimization procedure.
|
||||
Default is ``False``.
|
||||
maxfev : int
|
||||
Maximum number of function evaluations. Default is ``500 * n``, where
|
||||
``n`` is the number of variables.
|
||||
maxiter : int
|
||||
Maximum number of iterations. Default is ``1000 * n``, where ``n`` is
|
||||
the number of variables.
|
||||
f_target : float
|
||||
Target value for the objective function. The optimization procedure is
|
||||
terminated when the objective function value of a feasible point (see
|
||||
`feasibility_tol` below) is less than or equal to this target. Default
|
||||
is ``-numpy.inf``.
|
||||
feasibility_tol : float
|
||||
Absolute tolerance for the constraint violation. Default is ``1e-8``.
|
||||
initial_tr_radius : float
|
||||
Initial trust-region radius. Typically, this value should be in the
|
||||
order of one tenth of the greatest expected change to the variables.
|
||||
Default is ``1.0``.
|
||||
final_tr_radius : float
|
||||
Final trust-region radius. It should indicate the accuracy required in
|
||||
the final values of the variables. If provided, this option overrides
|
||||
the value of `tol` in the `minimize` function. Default is ``1e-6``.
|
||||
scale : bool
|
||||
Set to True to scale the variables according to the bounds. If True and
|
||||
if all the lower and upper bounds are finite, the variables are scaled
|
||||
to be within the range :math:`[-1, 1]`. If any of the lower or upper
|
||||
bounds is infinite, the variables are not scaled. Default is ``False``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] COBYQA
|
||||
https://www.cobyqa.com/stable/
|
||||
"""
|
||||
from .._lib.cobyqa import minimize # import here to avoid circular imports
|
||||
|
||||
_check_unknown_options(unknown_options)
|
||||
options = {
|
||||
'disp': bool(disp),
|
||||
'maxfev': int(maxfev) if maxfev is not None else 500 * len(x0),
|
||||
'maxiter': int(maxiter) if maxiter is not None else 1000 * len(x0),
|
||||
'target': float(f_target),
|
||||
'feasibility_tol': float(feasibility_tol),
|
||||
'radius_init': float(initial_tr_radius),
|
||||
'radius_final': float(final_tr_radius),
|
||||
'scale': bool(scale),
|
||||
}
|
||||
with COBYQA_LOCK:
|
||||
return minimize(fun, x0, args, bounds, constraints, callback, options)
|
||||
598
venv/lib/python3.13/site-packages/scipy/optimize/_constraints.py
Normal file
598
venv/lib/python3.13/site-packages/scipy/optimize/_constraints.py
Normal file
|
|
@ -0,0 +1,598 @@
|
|||
"""Constraints definition for minimize."""
|
||||
from warnings import warn, catch_warnings, simplefilter, filterwarnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ._differentiable_functions import (
|
||||
VectorFunction, LinearVectorFunction, IdentityVectorFunction
|
||||
)
|
||||
from ._hessian_update_strategy import BFGS
|
||||
from ._optimize import OptimizeWarning
|
||||
|
||||
from scipy._lib._sparse import issparse
|
||||
|
||||
|
||||
def _arr_to_scalar(x):
|
||||
# If x is a numpy array, return x.item(). This will
|
||||
# fail if the array has more than one element.
|
||||
return x.item() if isinstance(x, np.ndarray) else x
|
||||
|
||||
|
||||
class NonlinearConstraint:
|
||||
"""Nonlinear constraint on the variables.
|
||||
|
||||
The constraint has the general inequality form::
|
||||
|
||||
lb <= fun(x) <= ub
|
||||
|
||||
Here the vector of independent variables x is passed as ndarray of shape
|
||||
(n,) and ``fun`` returns a vector with m components.
|
||||
|
||||
It is possible to use equal bounds to represent an equality constraint or
|
||||
infinite bounds to represent a one-sided constraint.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
The function defining the constraint.
|
||||
The signature is ``fun(x) -> array_like, shape (m,)``.
|
||||
lb, ub : array_like
|
||||
Lower and upper bounds on the constraint. Each array must have the
|
||||
shape (m,) or be a scalar, in the latter case a bound will be the same
|
||||
for all components of the constraint. Use ``np.inf`` with an
|
||||
appropriate sign to specify a one-sided constraint.
|
||||
Set components of `lb` and `ub` equal to represent an equality
|
||||
constraint. Note that you can mix constraints of different types:
|
||||
interval, one-sided or equality, by setting different components of
|
||||
`lb` and `ub` as necessary.
|
||||
jac : {callable, '2-point', '3-point', 'cs'}, optional
|
||||
Method of computing the Jacobian matrix (an m-by-n matrix,
|
||||
where element (i, j) is the partial derivative of f[i] with
|
||||
respect to x[j]). The keywords {'2-point', '3-point',
|
||||
'cs'} select a finite difference scheme for the numerical estimation.
|
||||
A callable must have the following signature::
|
||||
|
||||
jac(x) -> {ndarray, sparse array}, shape (m, n)
|
||||
|
||||
Default is '2-point'.
|
||||
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy, None}, optional
|
||||
Method for computing the Hessian matrix. The keywords
|
||||
{'2-point', '3-point', 'cs'} select a finite difference scheme for
|
||||
numerical estimation. Alternatively, objects implementing
|
||||
`HessianUpdateStrategy` interface can be used to approximate the
|
||||
Hessian. Currently available implementations are:
|
||||
|
||||
- `BFGS` (default option)
|
||||
- `SR1`
|
||||
|
||||
A callable must return the Hessian matrix of ``dot(fun, v)`` and
|
||||
must have the following signature:
|
||||
``hess(x, v) -> {LinearOperator, sparse array, array_like}, shape (n, n)``.
|
||||
Here ``v`` is ndarray with shape (m,) containing Lagrange multipliers.
|
||||
keep_feasible : array_like of bool, optional
|
||||
Whether to keep the constraint components feasible throughout
|
||||
iterations. A single value set this property for all components.
|
||||
Default is False. Has no effect for equality constraints.
|
||||
finite_diff_rel_step: None or array_like, optional
|
||||
Relative step size for the finite difference approximation. Default is
|
||||
None, which will select a reasonable value automatically depending
|
||||
on a finite difference scheme.
|
||||
finite_diff_jac_sparsity: {None, array_like, sparse array}, optional
|
||||
Defines the sparsity structure of the Jacobian matrix for finite
|
||||
difference estimation, its shape must be (m, n). If the Jacobian has
|
||||
only few non-zero elements in *each* row, providing the sparsity
|
||||
structure will greatly speed up the computations. A zero entry means
|
||||
that a corresponding element in the Jacobian is identically zero.
|
||||
If provided, forces the use of 'lsmr' trust-region solver.
|
||||
If None (default) then dense differencing will be used.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Finite difference schemes {'2-point', '3-point', 'cs'} may be used for
|
||||
approximating either the Jacobian or the Hessian. We, however, do not allow
|
||||
its use for approximating both simultaneously. Hence whenever the Jacobian
|
||||
is estimated via finite-differences, we require the Hessian to be estimated
|
||||
using one of the quasi-Newton strategies.
|
||||
|
||||
The scheme 'cs' is potentially the most accurate, but requires the function
|
||||
to correctly handles complex inputs and be analytically continuable to the
|
||||
complex plane. The scheme '3-point' is more accurate than '2-point' but
|
||||
requires twice as many operations.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Constrain ``x[0] < sin(x[1]) + 1.9``
|
||||
|
||||
>>> from scipy.optimize import NonlinearConstraint
|
||||
>>> import numpy as np
|
||||
>>> con = lambda x: x[0] - np.sin(x[1])
|
||||
>>> nlc = NonlinearConstraint(con, -np.inf, 1.9)
|
||||
|
||||
"""
|
||||
def __init__(self, fun, lb, ub, jac='2-point', hess=None,
|
||||
keep_feasible=False, finite_diff_rel_step=None,
|
||||
finite_diff_jac_sparsity=None):
|
||||
if hess is None:
|
||||
hess = BFGS()
|
||||
self.fun = fun
|
||||
self.lb = lb
|
||||
self.ub = ub
|
||||
self.finite_diff_rel_step = finite_diff_rel_step
|
||||
self.finite_diff_jac_sparsity = finite_diff_jac_sparsity
|
||||
self.jac = jac
|
||||
self.hess = hess
|
||||
self.keep_feasible = keep_feasible
|
||||
|
||||
|
||||
class LinearConstraint:
|
||||
"""Linear constraint on the variables.
|
||||
|
||||
The constraint has the general inequality form::
|
||||
|
||||
lb <= A.dot(x) <= ub
|
||||
|
||||
Here the vector of independent variables x is passed as ndarray of shape
|
||||
(n,) and the matrix A has shape (m, n).
|
||||
|
||||
It is possible to use equal bounds to represent an equality constraint or
|
||||
infinite bounds to represent a one-sided constraint.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : {array_like, sparse array}, shape (m, n)
|
||||
Matrix defining the constraint.
|
||||
lb, ub : dense array_like, optional
|
||||
Lower and upper limits on the constraint. Each array must have the
|
||||
shape (m,) or be a scalar, in the latter case a bound will be the same
|
||||
for all components of the constraint. Use ``np.inf`` with an
|
||||
appropriate sign to specify a one-sided constraint.
|
||||
Set components of `lb` and `ub` equal to represent an equality
|
||||
constraint. Note that you can mix constraints of different types:
|
||||
interval, one-sided or equality, by setting different components of
|
||||
`lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
|
||||
and ``ub = np.inf`` (no limits).
|
||||
keep_feasible : dense array_like of bool, optional
|
||||
Whether to keep the constraint components feasible throughout
|
||||
iterations. A single value set this property for all components.
|
||||
Default is False. Has no effect for equality constraints.
|
||||
"""
|
||||
def _input_validation(self):
|
||||
if self.A.ndim != 2:
|
||||
message = "`A` must have exactly two dimensions."
|
||||
raise ValueError(message)
|
||||
|
||||
try:
|
||||
shape = self.A.shape[0:1]
|
||||
self.lb = np.broadcast_to(self.lb, shape)
|
||||
self.ub = np.broadcast_to(self.ub, shape)
|
||||
self.keep_feasible = np.broadcast_to(self.keep_feasible, shape)
|
||||
except ValueError:
|
||||
message = ("`lb`, `ub`, and `keep_feasible` must be broadcastable "
|
||||
"to shape `A.shape[0:1]`")
|
||||
raise ValueError(message)
|
||||
|
||||
def __init__(self, A, lb=-np.inf, ub=np.inf, keep_feasible=False):
|
||||
if not issparse(A):
|
||||
# In some cases, if the constraint is not valid, this emits a
|
||||
# VisibleDeprecationWarning about ragged nested sequences
|
||||
# before eventually causing an error. `scipy.optimize.milp` would
|
||||
# prefer that this just error out immediately so it can handle it
|
||||
# rather than concerning the user.
|
||||
with catch_warnings():
|
||||
simplefilter("error")
|
||||
self.A = np.atleast_2d(A).astype(np.float64)
|
||||
else:
|
||||
self.A = A
|
||||
if issparse(lb) or issparse(ub):
|
||||
raise ValueError("Constraint limits must be dense arrays.")
|
||||
self.lb = np.atleast_1d(lb).astype(np.float64)
|
||||
self.ub = np.atleast_1d(ub).astype(np.float64)
|
||||
|
||||
if issparse(keep_feasible):
|
||||
raise ValueError("`keep_feasible` must be a dense array.")
|
||||
self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
|
||||
self._input_validation()
|
||||
|
||||
def residual(self, x):
|
||||
"""
|
||||
Calculate the residual between the constraint function and the limits
|
||||
|
||||
For a linear constraint of the form::
|
||||
|
||||
lb <= A@x <= ub
|
||||
|
||||
the lower and upper residuals between ``A@x`` and the limits are values
|
||||
``sl`` and ``sb`` such that::
|
||||
|
||||
lb + sl == A@x == ub - sb
|
||||
|
||||
When all elements of ``sl`` and ``sb`` are positive, all elements of
|
||||
the constraint are satisfied; a negative element in ``sl`` or ``sb``
|
||||
indicates that the corresponding element of the constraint is not
|
||||
satisfied.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x: array_like
|
||||
Vector of independent variables
|
||||
|
||||
Returns
|
||||
-------
|
||||
sl, sb : array-like
|
||||
The lower and upper residuals
|
||||
"""
|
||||
return self.A@x - self.lb, self.ub - self.A@x
|
||||
|
||||
|
||||
class Bounds:
|
||||
"""Bounds constraint on the variables.
|
||||
|
||||
The constraint has the general inequality form::
|
||||
|
||||
lb <= x <= ub
|
||||
|
||||
It is possible to use equal bounds to represent an equality constraint or
|
||||
infinite bounds to represent a one-sided constraint.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lb, ub : dense array_like, optional
|
||||
Lower and upper bounds on independent variables. `lb`, `ub`, and
|
||||
`keep_feasible` must be the same shape or broadcastable.
|
||||
Set components of `lb` and `ub` equal
|
||||
to fix a variable. Use ``np.inf`` with an appropriate sign to disable
|
||||
bounds on all or some variables. Note that you can mix constraints of
|
||||
different types: interval, one-sided or equality, by setting different
|
||||
components of `lb` and `ub` as necessary. Defaults to ``lb = -np.inf``
|
||||
and ``ub = np.inf`` (no bounds).
|
||||
keep_feasible : dense array_like of bool, optional
|
||||
Whether to keep the constraint components feasible throughout
|
||||
iterations. Must be broadcastable with `lb` and `ub`.
|
||||
Default is False. Has no effect for equality constraints.
|
||||
"""
|
||||
def _input_validation(self):
|
||||
try:
|
||||
res = np.broadcast_arrays(self.lb, self.ub, self.keep_feasible)
|
||||
self.lb, self.ub, self.keep_feasible = res
|
||||
except ValueError:
|
||||
message = "`lb`, `ub`, and `keep_feasible` must be broadcastable."
|
||||
raise ValueError(message)
|
||||
|
||||
def __init__(self, lb=-np.inf, ub=np.inf, keep_feasible=False):
|
||||
if issparse(lb) or issparse(ub):
|
||||
raise ValueError("Lower and upper bounds must be dense arrays.")
|
||||
self.lb = np.atleast_1d(lb)
|
||||
self.ub = np.atleast_1d(ub)
|
||||
|
||||
if issparse(keep_feasible):
|
||||
raise ValueError("`keep_feasible` must be a dense array.")
|
||||
self.keep_feasible = np.atleast_1d(keep_feasible).astype(bool)
|
||||
self._input_validation()
|
||||
|
||||
def __repr__(self):
|
||||
start = f"{type(self).__name__}({self.lb!r}, {self.ub!r}"
|
||||
if np.any(self.keep_feasible):
|
||||
end = f", keep_feasible={self.keep_feasible!r})"
|
||||
else:
|
||||
end = ")"
|
||||
return start + end
|
||||
|
||||
def residual(self, x):
|
||||
"""Calculate the residual (slack) between the input and the bounds
|
||||
|
||||
For a bound constraint of the form::
|
||||
|
||||
lb <= x <= ub
|
||||
|
||||
the lower and upper residuals between `x` and the bounds are values
|
||||
``sl`` and ``sb`` such that::
|
||||
|
||||
lb + sl == x == ub - sb
|
||||
|
||||
When all elements of ``sl`` and ``sb`` are positive, all elements of
|
||||
``x`` lie within the bounds; a negative element in ``sl`` or ``sb``
|
||||
indicates that the corresponding element of ``x`` is out of bounds.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x: array_like
|
||||
Vector of independent variables
|
||||
|
||||
Returns
|
||||
-------
|
||||
sl, sb : array-like
|
||||
The lower and upper residuals
|
||||
"""
|
||||
return x - self.lb, self.ub - x
|
||||
|
||||
|
||||
class PreparedConstraint:
|
||||
"""Constraint prepared from a user defined constraint.
|
||||
|
||||
On creation it will check whether a constraint definition is valid and
|
||||
the initial point is feasible. If created successfully, it will contain
|
||||
the attributes listed below.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
constraint : {NonlinearConstraint, LinearConstraint`, Bounds}
|
||||
Constraint to check and prepare.
|
||||
x0 : array_like
|
||||
Initial vector of independent variables.
|
||||
sparse_jacobian : bool or None, optional
|
||||
If bool, then the Jacobian of the constraint will be converted
|
||||
to the corresponded format if necessary. If None (default), such
|
||||
conversion is not made.
|
||||
finite_diff_bounds : 2-tuple, optional
|
||||
Lower and upper bounds on the independent variables for the finite
|
||||
difference approximation, if applicable. Defaults to no bounds.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
fun : {VectorFunction, LinearVectorFunction, IdentityVectorFunction}
|
||||
Function defining the constraint wrapped by one of the convenience
|
||||
classes.
|
||||
bounds : 2-tuple
|
||||
Contains lower and upper bounds for the constraints --- lb and ub.
|
||||
These are converted to ndarray and have a size equal to the number of
|
||||
the constraints.
|
||||
keep_feasible : ndarray
|
||||
Array indicating which components must be kept feasible with a size
|
||||
equal to the number of the constraints.
|
||||
"""
|
||||
def __init__(self, constraint, x0, sparse_jacobian=None,
|
||||
finite_diff_bounds=(-np.inf, np.inf)):
|
||||
if isinstance(constraint, NonlinearConstraint):
|
||||
fun = VectorFunction(constraint.fun, x0,
|
||||
constraint.jac, constraint.hess,
|
||||
constraint.finite_diff_rel_step,
|
||||
constraint.finite_diff_jac_sparsity,
|
||||
finite_diff_bounds, sparse_jacobian)
|
||||
elif isinstance(constraint, LinearConstraint):
|
||||
fun = LinearVectorFunction(constraint.A, x0, sparse_jacobian)
|
||||
elif isinstance(constraint, Bounds):
|
||||
fun = IdentityVectorFunction(x0, sparse_jacobian)
|
||||
else:
|
||||
raise ValueError("`constraint` of an unknown type is passed.")
|
||||
|
||||
m = fun.m
|
||||
|
||||
lb = np.asarray(constraint.lb, dtype=float)
|
||||
ub = np.asarray(constraint.ub, dtype=float)
|
||||
keep_feasible = np.asarray(constraint.keep_feasible, dtype=bool)
|
||||
|
||||
lb = np.broadcast_to(lb, m)
|
||||
ub = np.broadcast_to(ub, m)
|
||||
keep_feasible = np.broadcast_to(keep_feasible, m)
|
||||
|
||||
if keep_feasible.shape != (m,):
|
||||
raise ValueError("`keep_feasible` has a wrong shape.")
|
||||
|
||||
mask = keep_feasible & (lb != ub)
|
||||
f0 = fun.f
|
||||
if np.any(f0[mask] < lb[mask]) or np.any(f0[mask] > ub[mask]):
|
||||
raise ValueError("`x0` is infeasible with respect to some "
|
||||
"inequality constraint with `keep_feasible` "
|
||||
"set to True.")
|
||||
|
||||
self.fun = fun
|
||||
self.bounds = (lb, ub)
|
||||
self.keep_feasible = keep_feasible
|
||||
|
||||
def violation(self, x):
|
||||
"""How much the constraint is exceeded by.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array-like
|
||||
Vector of independent variables
|
||||
|
||||
Returns
|
||||
-------
|
||||
excess : array-like
|
||||
How much the constraint is exceeded by, for each of the
|
||||
constraints specified by `PreparedConstraint.fun`.
|
||||
"""
|
||||
with catch_warnings():
|
||||
# Ignore the following warning, it's not important when
|
||||
# figuring out total violation
|
||||
# UserWarning: delta_grad == 0.0. Check if the approximated
|
||||
# function is linear
|
||||
filterwarnings("ignore", "delta_grad", UserWarning)
|
||||
ev = self.fun.fun(np.asarray(x))
|
||||
|
||||
excess_lb = np.maximum(self.bounds[0] - ev, 0)
|
||||
excess_ub = np.maximum(ev - self.bounds[1], 0)
|
||||
|
||||
return excess_lb + excess_ub
|
||||
|
||||
|
||||
def new_bounds_to_old(lb, ub, n):
|
||||
"""Convert the new bounds representation to the old one.
|
||||
|
||||
The new representation is a tuple (lb, ub) and the old one is a list
|
||||
containing n tuples, ith containing lower and upper bound on a ith
|
||||
variable.
|
||||
If any of the entries in lb/ub are -np.inf/np.inf they are replaced by
|
||||
None.
|
||||
"""
|
||||
lb = np.broadcast_to(lb, n)
|
||||
ub = np.broadcast_to(ub, n)
|
||||
|
||||
lb = [float(x) if x > -np.inf else None for x in lb]
|
||||
ub = [float(x) if x < np.inf else None for x in ub]
|
||||
|
||||
return list(zip(lb, ub))
|
||||
|
||||
|
||||
def old_bound_to_new(bounds):
|
||||
"""Convert the old bounds representation to the new one.
|
||||
|
||||
The new representation is a tuple (lb, ub) and the old one is a list
|
||||
containing n tuples, ith containing lower and upper bound on a ith
|
||||
variable.
|
||||
If any of the entries in lb/ub are None they are replaced by
|
||||
-np.inf/np.inf.
|
||||
"""
|
||||
lb, ub = zip(*bounds)
|
||||
|
||||
# Convert occurrences of None to -inf or inf, and replace occurrences of
|
||||
# any numpy array x with x.item(). Then wrap the results in numpy arrays.
|
||||
lb = np.array([float(_arr_to_scalar(x)) if x is not None else -np.inf
|
||||
for x in lb])
|
||||
ub = np.array([float(_arr_to_scalar(x)) if x is not None else np.inf
|
||||
for x in ub])
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
def strict_bounds(lb, ub, keep_feasible, n_vars):
|
||||
"""Remove bounds which are not asked to be kept feasible."""
|
||||
strict_lb = np.resize(lb, n_vars).astype(float)
|
||||
strict_ub = np.resize(ub, n_vars).astype(float)
|
||||
keep_feasible = np.resize(keep_feasible, n_vars)
|
||||
strict_lb[~keep_feasible] = -np.inf
|
||||
strict_ub[~keep_feasible] = np.inf
|
||||
return strict_lb, strict_ub
|
||||
|
||||
|
||||
def new_constraint_to_old(con, x0):
|
||||
"""
|
||||
Converts new-style constraint objects to old-style constraint dictionaries.
|
||||
"""
|
||||
if isinstance(con, NonlinearConstraint):
|
||||
if (con.finite_diff_jac_sparsity is not None or
|
||||
con.finite_diff_rel_step is not None or
|
||||
not isinstance(con.hess, BFGS) or # misses user specified BFGS
|
||||
con.keep_feasible):
|
||||
warn("Constraint options `finite_diff_jac_sparsity`, "
|
||||
"`finite_diff_rel_step`, `keep_feasible`, and `hess`"
|
||||
"are ignored by this method.",
|
||||
OptimizeWarning, stacklevel=3)
|
||||
|
||||
fun = con.fun
|
||||
if callable(con.jac):
|
||||
jac = con.jac
|
||||
else:
|
||||
jac = None
|
||||
|
||||
else: # LinearConstraint
|
||||
if np.any(con.keep_feasible):
|
||||
warn("Constraint option `keep_feasible` is ignored by this method.",
|
||||
OptimizeWarning, stacklevel=3)
|
||||
|
||||
A = con.A
|
||||
if issparse(A):
|
||||
A = A.toarray()
|
||||
def fun(x):
|
||||
return np.dot(A, x)
|
||||
def jac(x):
|
||||
return A
|
||||
|
||||
# FIXME: when bugs in VectorFunction/LinearVectorFunction are worked out,
|
||||
# use pcon.fun.fun and pcon.fun.jac. Until then, get fun/jac above.
|
||||
pcon = PreparedConstraint(con, x0)
|
||||
lb, ub = pcon.bounds
|
||||
|
||||
i_eq = lb == ub
|
||||
i_bound_below = np.logical_xor(lb != -np.inf, i_eq)
|
||||
i_bound_above = np.logical_xor(ub != np.inf, i_eq)
|
||||
i_unbounded = np.logical_and(lb == -np.inf, ub == np.inf)
|
||||
|
||||
if np.any(i_unbounded):
|
||||
warn("At least one constraint is unbounded above and below. Such "
|
||||
"constraints are ignored.",
|
||||
OptimizeWarning, stacklevel=3)
|
||||
|
||||
ceq = []
|
||||
if np.any(i_eq):
|
||||
def f_eq(x):
|
||||
y = np.array(fun(x)).flatten()
|
||||
return y[i_eq] - lb[i_eq]
|
||||
ceq = [{"type": "eq", "fun": f_eq}]
|
||||
|
||||
if jac is not None:
|
||||
def j_eq(x):
|
||||
dy = jac(x)
|
||||
if issparse(dy):
|
||||
dy = dy.toarray()
|
||||
dy = np.atleast_2d(dy)
|
||||
return dy[i_eq, :]
|
||||
ceq[0]["jac"] = j_eq
|
||||
|
||||
cineq = []
|
||||
n_bound_below = np.sum(i_bound_below)
|
||||
n_bound_above = np.sum(i_bound_above)
|
||||
if n_bound_below + n_bound_above:
|
||||
def f_ineq(x):
|
||||
y = np.zeros(n_bound_below + n_bound_above)
|
||||
y_all = np.array(fun(x)).flatten()
|
||||
y[:n_bound_below] = y_all[i_bound_below] - lb[i_bound_below]
|
||||
y[n_bound_below:] = -(y_all[i_bound_above] - ub[i_bound_above])
|
||||
return y
|
||||
cineq = [{"type": "ineq", "fun": f_ineq}]
|
||||
|
||||
if jac is not None:
|
||||
def j_ineq(x):
|
||||
dy = np.zeros((n_bound_below + n_bound_above, len(x0)))
|
||||
dy_all = jac(x)
|
||||
if issparse(dy_all):
|
||||
dy_all = dy_all.toarray()
|
||||
dy_all = np.atleast_2d(dy_all)
|
||||
dy[:n_bound_below, :] = dy_all[i_bound_below]
|
||||
dy[n_bound_below:, :] = -dy_all[i_bound_above]
|
||||
return dy
|
||||
cineq[0]["jac"] = j_ineq
|
||||
|
||||
old_constraints = ceq + cineq
|
||||
|
||||
if len(old_constraints) > 1:
|
||||
warn("Equality and inequality constraints are specified in the same "
|
||||
"element of the constraint list. For efficient use with this "
|
||||
"method, equality and inequality constraints should be specified "
|
||||
"in separate elements of the constraint list. ",
|
||||
OptimizeWarning, stacklevel=3)
|
||||
return old_constraints
|
||||
|
||||
|
||||
def old_constraint_to_new(ic, con):
|
||||
"""
|
||||
Converts old-style constraint dictionaries to new-style constraint objects.
|
||||
"""
|
||||
# check type
|
||||
try:
|
||||
ctype = con['type'].lower()
|
||||
except KeyError as e:
|
||||
raise KeyError(f'Constraint {ic} has no type defined.') from e
|
||||
except TypeError as e:
|
||||
raise TypeError(
|
||||
'Constraints must be a sequence of dictionaries.'
|
||||
) from e
|
||||
except AttributeError as e:
|
||||
raise TypeError("Constraint's type must be a string.") from e
|
||||
else:
|
||||
if ctype not in ['eq', 'ineq']:
|
||||
raise ValueError(f"Unknown constraint type '{con['type']}'.")
|
||||
if 'fun' not in con:
|
||||
raise ValueError(f'Constraint {ic} has no function defined.')
|
||||
|
||||
lb = 0
|
||||
if ctype == 'eq':
|
||||
ub = 0
|
||||
else:
|
||||
ub = np.inf
|
||||
|
||||
jac = '2-point'
|
||||
if 'args' in con:
|
||||
args = con['args']
|
||||
def fun(x):
|
||||
return con["fun"](x, *args)
|
||||
if 'jac' in con:
|
||||
def jac(x):
|
||||
return con["jac"](x, *args)
|
||||
else:
|
||||
fun = con['fun']
|
||||
if 'jac' in con:
|
||||
jac = con['jac']
|
||||
|
||||
return NonlinearConstraint(fun, lb, ub, jac)
|
||||
728
venv/lib/python3.13/site-packages/scipy/optimize/_dcsrch.py
Normal file
728
venv/lib/python3.13/site-packages/scipy/optimize/_dcsrch.py
Normal file
|
|
@ -0,0 +1,728 @@
|
|||
import numpy as np
|
||||
|
||||
"""
|
||||
# 2023 - ported from minpack2.dcsrch, dcstep (Fortran) to Python
|
||||
c MINPACK-1 Project. June 1983.
|
||||
c Argonne National Laboratory.
|
||||
c Jorge J. More' and David J. Thuente.
|
||||
c
|
||||
c MINPACK-2 Project. November 1993.
|
||||
c Argonne National Laboratory and University of Minnesota.
|
||||
c Brett M. Averick, Richard G. Carter, and Jorge J. More'.
|
||||
"""
|
||||
|
||||
# NOTE this file was linted by black on first commit, and can be kept that way.
|
||||
|
||||
|
||||
class DCSRCH:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
phi : callable phi(alpha)
|
||||
Function at point `alpha`
|
||||
derphi : callable phi'(alpha)
|
||||
Objective function derivative. Returns a scalar.
|
||||
ftol : float
|
||||
A nonnegative tolerance for the sufficient decrease condition.
|
||||
gtol : float
|
||||
A nonnegative tolerance for the curvature condition.
|
||||
xtol : float
|
||||
A nonnegative relative tolerance for an acceptable step. The
|
||||
subroutine exits with a warning if the relative difference between
|
||||
sty and stx is less than xtol.
|
||||
stpmin : float
|
||||
A nonnegative lower bound for the step.
|
||||
stpmax :
|
||||
A nonnegative upper bound for the step.
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
This subroutine finds a step that satisfies a sufficient
|
||||
decrease condition and a curvature condition.
|
||||
|
||||
Each call of the subroutine updates an interval with
|
||||
endpoints stx and sty. The interval is initially chosen
|
||||
so that it contains a minimizer of the modified function
|
||||
|
||||
psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
|
||||
|
||||
If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
|
||||
interval is chosen so that it contains a minimizer of f.
|
||||
|
||||
The algorithm is designed to find a step that satisfies
|
||||
the sufficient decrease condition
|
||||
|
||||
f(stp) <= f(0) + ftol*stp*f'(0),
|
||||
|
||||
and the curvature condition
|
||||
|
||||
abs(f'(stp)) <= gtol*abs(f'(0)).
|
||||
|
||||
If ftol is less than gtol and if, for example, the function
|
||||
is bounded below, then there is always a step which satisfies
|
||||
both conditions.
|
||||
|
||||
If no step can be found that satisfies both conditions, then
|
||||
the algorithm stops with a warning. In this case stp only
|
||||
satisfies the sufficient decrease condition.
|
||||
|
||||
A typical invocation of dcsrch has the following outline:
|
||||
|
||||
Evaluate the function at stp = 0.0d0; store in f.
|
||||
Evaluate the gradient at stp = 0.0d0; store in g.
|
||||
Choose a starting step stp.
|
||||
|
||||
task = 'START'
|
||||
10 continue
|
||||
call dcsrch(stp,f,g,ftol,gtol,xtol,task,stpmin,stpmax,
|
||||
isave,dsave)
|
||||
if (task .eq. 'FG') then
|
||||
Evaluate the function and the gradient at stp
|
||||
go to 10
|
||||
end if
|
||||
|
||||
NOTE: The user must not alter work arrays between calls.
|
||||
|
||||
The subroutine statement is
|
||||
|
||||
subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
|
||||
task,isave,dsave)
|
||||
where
|
||||
|
||||
stp is a double precision variable.
|
||||
On entry stp is the current estimate of a satisfactory
|
||||
step. On initial entry, a positive initial estimate
|
||||
must be provided.
|
||||
On exit stp is the current estimate of a satisfactory step
|
||||
if task = 'FG'. If task = 'CONV' then stp satisfies
|
||||
the sufficient decrease and curvature condition.
|
||||
|
||||
f is a double precision variable.
|
||||
On initial entry f is the value of the function at 0.
|
||||
On subsequent entries f is the value of the
|
||||
function at stp.
|
||||
On exit f is the value of the function at stp.
|
||||
|
||||
g is a double precision variable.
|
||||
On initial entry g is the derivative of the function at 0.
|
||||
On subsequent entries g is the derivative of the
|
||||
function at stp.
|
||||
On exit g is the derivative of the function at stp.
|
||||
|
||||
ftol is a double precision variable.
|
||||
On entry ftol specifies a nonnegative tolerance for the
|
||||
sufficient decrease condition.
|
||||
On exit ftol is unchanged.
|
||||
|
||||
gtol is a double precision variable.
|
||||
On entry gtol specifies a nonnegative tolerance for the
|
||||
curvature condition.
|
||||
On exit gtol is unchanged.
|
||||
|
||||
xtol is a double precision variable.
|
||||
On entry xtol specifies a nonnegative relative tolerance
|
||||
for an acceptable step. The subroutine exits with a
|
||||
warning if the relative difference between sty and stx
|
||||
is less than xtol.
|
||||
|
||||
On exit xtol is unchanged.
|
||||
|
||||
task is a character variable of length at least 60.
|
||||
On initial entry task must be set to 'START'.
|
||||
On exit task indicates the required action:
|
||||
|
||||
If task(1:2) = 'FG' then evaluate the function and
|
||||
derivative at stp and call dcsrch again.
|
||||
|
||||
If task(1:4) = 'CONV' then the search is successful.
|
||||
|
||||
If task(1:4) = 'WARN' then the subroutine is not able
|
||||
to satisfy the convergence conditions. The exit value of
|
||||
stp contains the best point found during the search.
|
||||
|
||||
If task(1:5) = 'ERROR' then there is an error in the
|
||||
input arguments.
|
||||
|
||||
On exit with convergence, a warning or an error, the
|
||||
variable task contains additional information.
|
||||
|
||||
stpmin is a double precision variable.
|
||||
On entry stpmin is a nonnegative lower bound for the step.
|
||||
On exit stpmin is unchanged.
|
||||
|
||||
stpmax is a double precision variable.
|
||||
On entry stpmax is a nonnegative upper bound for the step.
|
||||
On exit stpmax is unchanged.
|
||||
|
||||
isave is an integer work array of dimension 2.
|
||||
|
||||
dsave is a double precision work array of dimension 13.
|
||||
|
||||
Subprograms called
|
||||
|
||||
MINPACK-2 ... dcstep
|
||||
MINPACK-1 Project. June 1983.
|
||||
Argonne National Laboratory.
|
||||
Jorge J. More' and David J. Thuente.
|
||||
|
||||
MINPACK-2 Project. November 1993.
|
||||
Argonne National Laboratory and University of Minnesota.
|
||||
Brett M. Averick, Richard G. Carter, and Jorge J. More'.
|
||||
"""
|
||||
|
||||
def __init__(self, phi, derphi, ftol, gtol, xtol, stpmin, stpmax):
|
||||
self.stage = None
|
||||
self.ginit = None
|
||||
self.gtest = None
|
||||
self.gx = None
|
||||
self.gy = None
|
||||
self.finit = None
|
||||
self.fx = None
|
||||
self.fy = None
|
||||
self.stx = None
|
||||
self.sty = None
|
||||
self.stmin = None
|
||||
self.stmax = None
|
||||
self.width = None
|
||||
self.width1 = None
|
||||
|
||||
# leave all assessment of tolerances/limits to the first call of
|
||||
# this object
|
||||
self.ftol = ftol
|
||||
self.gtol = gtol
|
||||
self.xtol = xtol
|
||||
self.stpmin = stpmin
|
||||
self.stpmax = stpmax
|
||||
|
||||
self.phi = phi
|
||||
self.derphi = derphi
|
||||
|
||||
def __call__(self, alpha1, phi0=None, derphi0=None, maxiter=100):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
alpha1 : float
|
||||
alpha1 is the current estimate of a satisfactory
|
||||
step. A positive initial estimate must be provided.
|
||||
phi0 : float
|
||||
the value of `phi` at 0 (if known).
|
||||
derphi0 : float
|
||||
the derivative of `derphi` at 0 (if known).
|
||||
maxiter : int
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step size, or None if no suitable step was found.
|
||||
phi : float
|
||||
Value of `phi` at the new point `alpha`.
|
||||
phi0 : float
|
||||
Value of `phi` at `alpha=0`.
|
||||
task : bytes
|
||||
On exit task indicates status information.
|
||||
|
||||
If task[:4] == b'CONV' then the search is successful.
|
||||
|
||||
If task[:4] == b'WARN' then the subroutine is not able
|
||||
to satisfy the convergence conditions. The exit value of
|
||||
stp contains the best point found during the search.
|
||||
|
||||
If task[:5] == b'ERROR' then there is an error in the
|
||||
input arguments.
|
||||
"""
|
||||
if phi0 is None:
|
||||
phi0 = self.phi(0.0)
|
||||
if derphi0 is None:
|
||||
derphi0 = self.derphi(0.0)
|
||||
|
||||
phi1 = phi0
|
||||
derphi1 = derphi0
|
||||
|
||||
task = b"START"
|
||||
for i in range(maxiter):
|
||||
stp, phi1, derphi1, task = self._iterate(
|
||||
alpha1, phi1, derphi1, task
|
||||
)
|
||||
|
||||
if not np.isfinite(stp):
|
||||
task = b"WARN"
|
||||
stp = None
|
||||
break
|
||||
|
||||
if task[:2] == b"FG":
|
||||
alpha1 = stp
|
||||
phi1 = self.phi(stp)
|
||||
derphi1 = self.derphi(stp)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# maxiter reached, the line search did not converge
|
||||
stp = None
|
||||
task = b"WARNING: dcsrch did not converge within max iterations"
|
||||
|
||||
if task[:5] == b"ERROR" or task[:4] == b"WARN":
|
||||
stp = None # failed
|
||||
|
||||
return stp, phi1, phi0, task
|
||||
|
||||
def _iterate(self, stp, f, g, task):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
stp : float
|
||||
The current estimate of a satisfactory step. On initial entry, a
|
||||
positive initial estimate must be provided.
|
||||
f : float
|
||||
On first call f is the value of the function at 0. On subsequent
|
||||
entries f should be the value of the function at stp.
|
||||
g : float
|
||||
On initial entry g is the derivative of the function at 0. On
|
||||
subsequent entries g is the derivative of the function at stp.
|
||||
task : bytes
|
||||
On initial entry task must be set to 'START'.
|
||||
|
||||
On exit with convergence, a warning or an error, the
|
||||
variable task contains additional information.
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
stp, f, g, task: tuple
|
||||
|
||||
stp : float
|
||||
the current estimate of a satisfactory step if task = 'FG'. If
|
||||
task = 'CONV' then stp satisfies the sufficient decrease and
|
||||
curvature condition.
|
||||
f : float
|
||||
the value of the function at stp.
|
||||
g : float
|
||||
the derivative of the function at stp.
|
||||
task : bytes
|
||||
On exit task indicates the required action:
|
||||
|
||||
If task(1:2) == b'FG' then evaluate the function and
|
||||
derivative at stp and call dcsrch again.
|
||||
|
||||
If task(1:4) == b'CONV' then the search is successful.
|
||||
|
||||
If task(1:4) == b'WARN' then the subroutine is not able
|
||||
to satisfy the convergence conditions. The exit value of
|
||||
stp contains the best point found during the search.
|
||||
|
||||
If task(1:5) == b'ERROR' then there is an error in the
|
||||
input arguments.
|
||||
"""
|
||||
p5 = 0.5
|
||||
p66 = 0.66
|
||||
xtrapl = 1.1
|
||||
xtrapu = 4.0
|
||||
|
||||
if task[:5] == b"START":
|
||||
if stp < self.stpmin:
|
||||
task = b"ERROR: STP .LT. STPMIN"
|
||||
if stp > self.stpmax:
|
||||
task = b"ERROR: STP .GT. STPMAX"
|
||||
if g >= 0:
|
||||
task = b"ERROR: INITIAL G .GE. ZERO"
|
||||
if self.ftol < 0:
|
||||
task = b"ERROR: FTOL .LT. ZERO"
|
||||
if self.gtol < 0:
|
||||
task = b"ERROR: GTOL .LT. ZERO"
|
||||
if self.xtol < 0:
|
||||
task = b"ERROR: XTOL .LT. ZERO"
|
||||
if self.stpmin < 0:
|
||||
task = b"ERROR: STPMIN .LT. ZERO"
|
||||
if self.stpmax < self.stpmin:
|
||||
task = b"ERROR: STPMAX .LT. STPMIN"
|
||||
|
||||
if task[:5] == b"ERROR":
|
||||
return stp, f, g, task
|
||||
|
||||
# Initialize local variables.
|
||||
|
||||
self.brackt = False
|
||||
self.stage = 1
|
||||
self.finit = f
|
||||
self.ginit = g
|
||||
self.gtest = self.ftol * self.ginit
|
||||
self.width = self.stpmax - self.stpmin
|
||||
self.width1 = self.width / p5
|
||||
|
||||
# The variables stx, fx, gx contain the values of the step,
|
||||
# function, and derivative at the best step.
|
||||
# The variables sty, fy, gy contain the value of the step,
|
||||
# function, and derivative at sty.
|
||||
# The variables stp, f, g contain the values of the step,
|
||||
# function, and derivative at stp.
|
||||
|
||||
self.stx = 0.0
|
||||
self.fx = self.finit
|
||||
self.gx = self.ginit
|
||||
self.sty = 0.0
|
||||
self.fy = self.finit
|
||||
self.gy = self.ginit
|
||||
self.stmin = 0
|
||||
self.stmax = stp + xtrapu * stp
|
||||
task = b"FG"
|
||||
return stp, f, g, task
|
||||
|
||||
# in the original Fortran this was a location to restore variables
|
||||
# we don't need to do that because they're attributes.
|
||||
|
||||
# If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
|
||||
# algorithm enters the second stage.
|
||||
ftest = self.finit + stp * self.gtest
|
||||
|
||||
if self.stage == 1 and f <= ftest and g >= 0:
|
||||
self.stage = 2
|
||||
|
||||
# test for warnings
|
||||
if self.brackt and (stp <= self.stmin or stp >= self.stmax):
|
||||
task = b"WARNING: ROUNDING ERRORS PREVENT PROGRESS"
|
||||
if self.brackt and self.stmax - self.stmin <= self.xtol * self.stmax:
|
||||
task = b"WARNING: XTOL TEST SATISFIED"
|
||||
if stp == self.stpmax and f <= ftest and g <= self.gtest:
|
||||
task = b"WARNING: STP = STPMAX"
|
||||
if stp == self.stpmin and (f > ftest or g >= self.gtest):
|
||||
task = b"WARNING: STP = STPMIN"
|
||||
|
||||
# test for convergence
|
||||
if f <= ftest and abs(g) <= self.gtol * -self.ginit:
|
||||
task = b"CONVERGENCE"
|
||||
|
||||
# test for termination
|
||||
if task[:4] == b"WARN" or task[:4] == b"CONV":
|
||||
return stp, f, g, task
|
||||
|
||||
# A modified function is used to predict the step during the
|
||||
# first stage if a lower function value has been obtained but
|
||||
# the decrease is not sufficient.
|
||||
if self.stage == 1 and f <= self.fx and f > ftest:
|
||||
# Define the modified function and derivative values.
|
||||
fm = f - stp * self.gtest
|
||||
fxm = self.fx - self.stx * self.gtest
|
||||
fym = self.fy - self.sty * self.gtest
|
||||
gm = g - self.gtest
|
||||
gxm = self.gx - self.gtest
|
||||
gym = self.gy - self.gtest
|
||||
|
||||
# Call dcstep to update stx, sty, and to compute the new step.
|
||||
# dcstep can have several operations which can produce NaN
|
||||
# e.g. inf/inf. Filter these out.
|
||||
with np.errstate(invalid="ignore", over="ignore"):
|
||||
tup = dcstep(
|
||||
self.stx,
|
||||
fxm,
|
||||
gxm,
|
||||
self.sty,
|
||||
fym,
|
||||
gym,
|
||||
stp,
|
||||
fm,
|
||||
gm,
|
||||
self.brackt,
|
||||
self.stmin,
|
||||
self.stmax,
|
||||
)
|
||||
self.stx, fxm, gxm, self.sty, fym, gym, stp, self.brackt = tup
|
||||
|
||||
# Reset the function and derivative values for f
|
||||
self.fx = fxm + self.stx * self.gtest
|
||||
self.fy = fym + self.sty * self.gtest
|
||||
self.gx = gxm + self.gtest
|
||||
self.gy = gym + self.gtest
|
||||
|
||||
else:
|
||||
# Call dcstep to update stx, sty, and to compute the new step.
|
||||
# dcstep can have several operations which can produce NaN
|
||||
# e.g. inf/inf. Filter these out.
|
||||
|
||||
with np.errstate(invalid="ignore", over="ignore"):
|
||||
tup = dcstep(
|
||||
self.stx,
|
||||
self.fx,
|
||||
self.gx,
|
||||
self.sty,
|
||||
self.fy,
|
||||
self.gy,
|
||||
stp,
|
||||
f,
|
||||
g,
|
||||
self.brackt,
|
||||
self.stmin,
|
||||
self.stmax,
|
||||
)
|
||||
(
|
||||
self.stx,
|
||||
self.fx,
|
||||
self.gx,
|
||||
self.sty,
|
||||
self.fy,
|
||||
self.gy,
|
||||
stp,
|
||||
self.brackt,
|
||||
) = tup
|
||||
|
||||
# Decide if a bisection step is needed
|
||||
if self.brackt:
|
||||
if abs(self.sty - self.stx) >= p66 * self.width1:
|
||||
stp = self.stx + p5 * (self.sty - self.stx)
|
||||
self.width1 = self.width
|
||||
self.width = abs(self.sty - self.stx)
|
||||
|
||||
# Set the minimum and maximum steps allowed for stp.
|
||||
if self.brackt:
|
||||
self.stmin = min(self.stx, self.sty)
|
||||
self.stmax = max(self.stx, self.sty)
|
||||
else:
|
||||
self.stmin = stp + xtrapl * (stp - self.stx)
|
||||
self.stmax = stp + xtrapu * (stp - self.stx)
|
||||
|
||||
# Force the step to be within the bounds stpmax and stpmin.
|
||||
stp = np.clip(stp, self.stpmin, self.stpmax)
|
||||
|
||||
# If further progress is not possible, let stp be the best
|
||||
# point obtained during the search.
|
||||
if (
|
||||
self.brackt
|
||||
and (stp <= self.stmin or stp >= self.stmax)
|
||||
or (
|
||||
self.brackt
|
||||
and self.stmax - self.stmin <= self.xtol * self.stmax
|
||||
)
|
||||
):
|
||||
stp = self.stx
|
||||
|
||||
# Obtain another function and derivative
|
||||
task = b"FG"
|
||||
return stp, f, g, task
|
||||
|
||||
|
||||
def dcstep(stx, fx, dx, sty, fy, dy, stp, fp, dp, brackt, stpmin, stpmax):
|
||||
"""
|
||||
Subroutine dcstep
|
||||
|
||||
This subroutine computes a safeguarded step for a search
|
||||
procedure and updates an interval that contains a step that
|
||||
satisfies a sufficient decrease and a curvature condition.
|
||||
|
||||
The parameter stx contains the step with the least function
|
||||
value. If brackt is set to .true. then a minimizer has
|
||||
been bracketed in an interval with endpoints stx and sty.
|
||||
The parameter stp contains the current step.
|
||||
The subroutine assumes that if brackt is set to .true. then
|
||||
|
||||
min(stx,sty) < stp < max(stx,sty),
|
||||
|
||||
and that the derivative at stx is negative in the direction
|
||||
of the step.
|
||||
|
||||
The subroutine statement is
|
||||
|
||||
subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
|
||||
stpmin,stpmax)
|
||||
|
||||
where
|
||||
|
||||
stx is a double precision variable.
|
||||
On entry stx is the best step obtained so far and is an
|
||||
endpoint of the interval that contains the minimizer.
|
||||
On exit stx is the updated best step.
|
||||
|
||||
fx is a double precision variable.
|
||||
On entry fx is the function at stx.
|
||||
On exit fx is the function at stx.
|
||||
|
||||
dx is a double precision variable.
|
||||
On entry dx is the derivative of the function at
|
||||
stx. The derivative must be negative in the direction of
|
||||
the step, that is, dx and stp - stx must have opposite
|
||||
signs.
|
||||
On exit dx is the derivative of the function at stx.
|
||||
|
||||
sty is a double precision variable.
|
||||
On entry sty is the second endpoint of the interval that
|
||||
contains the minimizer.
|
||||
On exit sty is the updated endpoint of the interval that
|
||||
contains the minimizer.
|
||||
|
||||
fy is a double precision variable.
|
||||
On entry fy is the function at sty.
|
||||
On exit fy is the function at sty.
|
||||
|
||||
dy is a double precision variable.
|
||||
On entry dy is the derivative of the function at sty.
|
||||
On exit dy is the derivative of the function at the exit sty.
|
||||
|
||||
stp is a double precision variable.
|
||||
On entry stp is the current step. If brackt is set to .true.
|
||||
then on input stp must be between stx and sty.
|
||||
On exit stp is a new trial step.
|
||||
|
||||
fp is a double precision variable.
|
||||
On entry fp is the function at stp
|
||||
On exit fp is unchanged.
|
||||
|
||||
dp is a double precision variable.
|
||||
On entry dp is the derivative of the function at stp.
|
||||
On exit dp is unchanged.
|
||||
|
||||
brackt is an logical variable.
|
||||
On entry brackt specifies if a minimizer has been bracketed.
|
||||
Initially brackt must be set to .false.
|
||||
On exit brackt specifies if a minimizer has been bracketed.
|
||||
When a minimizer is bracketed brackt is set to .true.
|
||||
|
||||
stpmin is a double precision variable.
|
||||
On entry stpmin is a lower bound for the step.
|
||||
On exit stpmin is unchanged.
|
||||
|
||||
stpmax is a double precision variable.
|
||||
On entry stpmax is an upper bound for the step.
|
||||
On exit stpmax is unchanged.
|
||||
|
||||
MINPACK-1 Project. June 1983
|
||||
Argonne National Laboratory.
|
||||
Jorge J. More' and David J. Thuente.
|
||||
|
||||
MINPACK-2 Project. November 1993.
|
||||
Argonne National Laboratory and University of Minnesota.
|
||||
Brett M. Averick and Jorge J. More'.
|
||||
|
||||
"""
|
||||
sgn_dp = np.sign(dp)
|
||||
sgn_dx = np.sign(dx)
|
||||
|
||||
# sgnd = dp * (dx / abs(dx))
|
||||
sgnd = sgn_dp * sgn_dx
|
||||
|
||||
# First case: A higher function value. The minimum is bracketed.
|
||||
# If the cubic step is closer to stx than the quadratic step, the
|
||||
# cubic step is taken, otherwise the average of the cubic and
|
||||
# quadratic steps is taken.
|
||||
if fp > fx:
|
||||
theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp
|
||||
s = max(abs(theta), abs(dx), abs(dp))
|
||||
gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
|
||||
if stp < stx:
|
||||
gamma *= -1
|
||||
p = (gamma - dx) + theta
|
||||
q = ((gamma - dx) + gamma) + dp
|
||||
r = p / q
|
||||
stpc = stx + r * (stp - stx)
|
||||
stpq = stx + ((dx / ((fx - fp) / (stp - stx) + dx)) / 2.0) * (stp - stx)
|
||||
if abs(stpc - stx) <= abs(stpq - stx):
|
||||
stpf = stpc
|
||||
else:
|
||||
stpf = stpc + (stpq - stpc) / 2.0
|
||||
brackt = True
|
||||
elif sgnd < 0.0:
|
||||
# Second case: A lower function value and derivatives of opposite
|
||||
# sign. The minimum is bracketed. If the cubic step is farther from
|
||||
# stp than the secant step, the cubic step is taken, otherwise the
|
||||
# secant step is taken.
|
||||
theta = 3 * (fx - fp) / (stp - stx) + dx + dp
|
||||
s = max(abs(theta), abs(dx), abs(dp))
|
||||
gamma = s * np.sqrt((theta / s) ** 2 - (dx / s) * (dp / s))
|
||||
if stp > stx:
|
||||
gamma *= -1
|
||||
p = (gamma - dp) + theta
|
||||
q = ((gamma - dp) + gamma) + dx
|
||||
r = p / q
|
||||
stpc = stp + r * (stx - stp)
|
||||
stpq = stp + (dp / (dp - dx)) * (stx - stp)
|
||||
if abs(stpc - stp) > abs(stpq - stp):
|
||||
stpf = stpc
|
||||
else:
|
||||
stpf = stpq
|
||||
brackt = True
|
||||
elif abs(dp) < abs(dx):
|
||||
# Third case: A lower function value, derivatives of the same sign,
|
||||
# and the magnitude of the derivative decreases.
|
||||
|
||||
# The cubic step is computed only if the cubic tends to infinity
|
||||
# in the direction of the step or if the minimum of the cubic
|
||||
# is beyond stp. Otherwise the cubic step is defined to be the
|
||||
# secant step.
|
||||
theta = 3 * (fx - fp) / (stp - stx) + dx + dp
|
||||
s = max(abs(theta), abs(dx), abs(dp))
|
||||
|
||||
# The case gamma = 0 only arises if the cubic does not tend
|
||||
# to infinity in the direction of the step.
|
||||
gamma = s * np.sqrt(max(0, (theta / s) ** 2 - (dx / s) * (dp / s)))
|
||||
if stp > stx:
|
||||
gamma = -gamma
|
||||
p = (gamma - dp) + theta
|
||||
q = (gamma + (dx - dp)) + gamma
|
||||
r = p / q
|
||||
if r < 0 and gamma != 0:
|
||||
stpc = stp + r * (stx - stp)
|
||||
elif stp > stx:
|
||||
stpc = stpmax
|
||||
else:
|
||||
stpc = stpmin
|
||||
stpq = stp + (dp / (dp - dx)) * (stx - stp)
|
||||
|
||||
if brackt:
|
||||
# A minimizer has been bracketed. If the cubic step is
|
||||
# closer to stp than the secant step, the cubic step is
|
||||
# taken, otherwise the secant step is taken.
|
||||
if abs(stpc - stp) < abs(stpq - stp):
|
||||
stpf = stpc
|
||||
else:
|
||||
stpf = stpq
|
||||
|
||||
if stp > stx:
|
||||
stpf = min(stp + 0.66 * (sty - stp), stpf)
|
||||
else:
|
||||
stpf = max(stp + 0.66 * (sty - stp), stpf)
|
||||
else:
|
||||
# A minimizer has not been bracketed. If the cubic step is
|
||||
# farther from stp than the secant step, the cubic step is
|
||||
# taken, otherwise the secant step is taken.
|
||||
if abs(stpc - stp) > abs(stpq - stp):
|
||||
stpf = stpc
|
||||
else:
|
||||
stpf = stpq
|
||||
stpf = np.clip(stpf, stpmin, stpmax)
|
||||
|
||||
else:
|
||||
# Fourth case: A lower function value, derivatives of the same sign,
|
||||
# and the magnitude of the derivative does not decrease. If the
|
||||
# minimum is not bracketed, the step is either stpmin or stpmax,
|
||||
# otherwise the cubic step is taken.
|
||||
if brackt:
|
||||
theta = 3.0 * (fp - fy) / (sty - stp) + dy + dp
|
||||
s = max(abs(theta), abs(dy), abs(dp))
|
||||
gamma = s * np.sqrt((theta / s) ** 2 - (dy / s) * (dp / s))
|
||||
if stp > sty:
|
||||
gamma = -gamma
|
||||
p = (gamma - dp) + theta
|
||||
q = ((gamma - dp) + gamma) + dy
|
||||
r = p / q
|
||||
stpc = stp + r * (sty - stp)
|
||||
stpf = stpc
|
||||
elif stp > stx:
|
||||
stpf = stpmax
|
||||
else:
|
||||
stpf = stpmin
|
||||
|
||||
# Update the interval which contains a minimizer.
|
||||
if fp > fx:
|
||||
sty = stp
|
||||
fy = fp
|
||||
dy = dp
|
||||
else:
|
||||
if sgnd < 0:
|
||||
sty = stx
|
||||
fy = fx
|
||||
dy = dx
|
||||
stx = stp
|
||||
fx = fp
|
||||
dx = dp
|
||||
|
||||
# Compute the new step.
|
||||
stp = stpf
|
||||
|
||||
return stx, fx, dx, sty, fy, dy, stp, brackt
|
||||
|
|
@ -0,0 +1,835 @@
|
|||
from collections import namedtuple
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sps
|
||||
from ._numdiff import approx_derivative, group_columns
|
||||
from ._hessian_update_strategy import HessianUpdateStrategy
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
from scipy._lib._array_api import array_namespace, xp_copy
|
||||
from scipy._lib import array_api_extra as xpx
|
||||
from scipy._lib._util import _ScalarFunctionWrapper
|
||||
|
||||
|
||||
FD_METHODS = ('2-point', '3-point', 'cs')
|
||||
|
||||
|
||||
class _ScalarGradWrapper:
|
||||
"""
|
||||
Wrapper class for gradient calculation
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
grad,
|
||||
fun=None,
|
||||
args=None,
|
||||
finite_diff_options=None,
|
||||
):
|
||||
self.fun = fun
|
||||
self.grad = grad
|
||||
self.args = [] if args is None else args
|
||||
self.finite_diff_options = finite_diff_options
|
||||
self.ngev = 0
|
||||
# number of function evaluations consumed by finite difference
|
||||
self.nfev = 0
|
||||
|
||||
def __call__(self, x, f0=None, **kwds):
|
||||
# Send a copy because the user may overwrite it.
|
||||
# The user of this class might want `x` to remain unchanged.
|
||||
if callable(self.grad):
|
||||
g = np.atleast_1d(self.grad(np.copy(x), *self.args))
|
||||
elif self.grad in FD_METHODS:
|
||||
g, dct = approx_derivative(
|
||||
self.fun,
|
||||
x,
|
||||
f0=f0,
|
||||
**self.finite_diff_options,
|
||||
)
|
||||
self.nfev += dct['nfev']
|
||||
|
||||
self.ngev += 1
|
||||
return g
|
||||
|
||||
|
||||
class _ScalarHessWrapper:
|
||||
"""
|
||||
Wrapper class for hess calculation via finite differences
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
hess,
|
||||
x0=None,
|
||||
grad=None,
|
||||
args=None,
|
||||
finite_diff_options=None,
|
||||
):
|
||||
self.hess = hess
|
||||
self.grad = grad
|
||||
self.args = [] if args is None else args
|
||||
self.finite_diff_options = finite_diff_options
|
||||
# keep track of any finite difference function evaluations for grad
|
||||
self.ngev = 0
|
||||
self.nhev = 0
|
||||
self.H = None
|
||||
self._hess_func = None
|
||||
|
||||
if callable(hess):
|
||||
self.H = hess(np.copy(x0), *args)
|
||||
self.nhev += 1
|
||||
|
||||
if sps.issparse(self.H):
|
||||
self._hess_func = self._sparse_callable
|
||||
self.H = sps.csr_array(self.H)
|
||||
elif isinstance(self.H, LinearOperator):
|
||||
self._hess_func = self._linearoperator_callable
|
||||
else:
|
||||
# dense
|
||||
self._hess_func = self._dense_callable
|
||||
self.H = np.atleast_2d(np.asarray(self.H))
|
||||
elif hess in FD_METHODS:
|
||||
self._hess_func = self._fd_hess
|
||||
|
||||
def __call__(self, x, f0=None, **kwds):
|
||||
return self._hess_func(np.copy(x), f0=f0)
|
||||
|
||||
def _fd_hess(self, x, f0=None, **kwds):
|
||||
self.H, dct = approx_derivative(
|
||||
self.grad, x, f0=f0, **self.finite_diff_options
|
||||
)
|
||||
self.ngev += dct["nfev"]
|
||||
return self.H
|
||||
|
||||
def _sparse_callable(self, x, **kwds):
|
||||
self.nhev += 1
|
||||
self.H = sps.csr_array(self.hess(x, *self.args))
|
||||
return self.H
|
||||
|
||||
def _dense_callable(self, x, **kwds):
|
||||
self.nhev += 1
|
||||
self.H = np.atleast_2d(
|
||||
np.asarray(self.hess(x, *self.args))
|
||||
)
|
||||
return self.H
|
||||
|
||||
def _linearoperator_callable(self, x, **kwds):
|
||||
self.nhev += 1
|
||||
self.H = self.hess(x, *self.args)
|
||||
return self.H
|
||||
|
||||
|
||||
class ScalarFunction:
|
||||
"""Scalar function and its derivatives.
|
||||
|
||||
This class defines a scalar function F: R^n->R and methods for
|
||||
computing or approximating its first and second derivatives.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
evaluates the scalar function. Must be of the form ``fun(x, *args)``,
|
||||
where ``x`` is the argument in the form of a 1-D array and ``args`` is
|
||||
a tuple of any additional fixed parameters needed to completely specify
|
||||
the function. Should return a scalar.
|
||||
x0 : array-like
|
||||
Provides an initial set of variables for evaluating fun. Array of real
|
||||
elements of size (n,), where 'n' is the number of independent
|
||||
variables.
|
||||
args : tuple, optional
|
||||
Any additional fixed parameters needed to completely specify the scalar
|
||||
function.
|
||||
grad : {callable, '2-point', '3-point', 'cs'}
|
||||
Method for computing the gradient vector.
|
||||
If it is a callable, it should be a function that returns the gradient
|
||||
vector:
|
||||
|
||||
``grad(x, *args) -> array_like, shape (n,)``
|
||||
|
||||
where ``x`` is an array with shape (n,) and ``args`` is a tuple with
|
||||
the fixed parameters.
|
||||
Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
|
||||
to select a finite difference scheme for numerical estimation of the
|
||||
gradient with a relative step size. These finite difference schemes
|
||||
obey any specified `bounds`.
|
||||
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
|
||||
Method for computing the Hessian matrix. If it is callable, it should
|
||||
return the Hessian matrix:
|
||||
|
||||
``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
|
||||
|
||||
where x is a (n,) ndarray and `args` is a tuple with the fixed
|
||||
parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
|
||||
select a finite difference scheme for numerical estimation. Or, objects
|
||||
implementing `HessianUpdateStrategy` interface can be used to
|
||||
approximate the Hessian.
|
||||
Whenever the gradient is estimated via finite-differences, the Hessian
|
||||
cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
|
||||
to be estimated using one of the quasi-Newton strategies.
|
||||
finite_diff_rel_step : None or array_like
|
||||
Relative step size to use. The absolute step size is computed as
|
||||
``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
|
||||
adjusted to fit into the bounds. For ``method='3-point'`` the sign
|
||||
of `h` is ignored. If None then finite_diff_rel_step is selected
|
||||
automatically,
|
||||
finite_diff_bounds : tuple of array_like
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds,
|
||||
(-np.inf, np.inf). Each bound must match the size of `x0` or be a
|
||||
scalar, in the latter case the bound will be the same for all
|
||||
variables. Use it to limit the range of function evaluation.
|
||||
epsilon : None or array_like, optional
|
||||
Absolute step size to use, possibly adjusted to fit into the bounds.
|
||||
For ``method='3-point'`` the sign of `epsilon` is ignored. By default
|
||||
relative steps are used, only if ``epsilon is not None`` are absolute
|
||||
steps used.
|
||||
workers : map-like callable, optional
|
||||
A map-like callable, such as `multiprocessing.Pool.map` for evaluating
|
||||
any numerical differentiation in parallel.
|
||||
This evaluation is carried out as ``workers(fun, iterable)``, or
|
||||
``workers(grad, iterable)``, depending on what is being numerically
|
||||
differentiated.
|
||||
Alternatively, if `workers` is an int the task is subdivided into `workers`
|
||||
sections and the function evaluated in parallel
|
||||
(uses `multiprocessing.Pool <multiprocessing>`).
|
||||
Supply -1 to use all available CPU cores.
|
||||
It is recommended that a map-like be used instead of int, as repeated
|
||||
calls to `approx_derivative` will incur large overhead from setting up
|
||||
new processes.
|
||||
|
||||
.. versionadded:: 1.16.0
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class implements a memoization logic. There are methods `fun`,
|
||||
`grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
|
||||
things should be considered:
|
||||
|
||||
1. Use only public methods `fun`, `grad` and `hess`.
|
||||
2. After one of the methods is called, the corresponding attribute
|
||||
will be set. However, a subsequent call with a different argument
|
||||
of *any* of the methods may overwrite the attribute.
|
||||
"""
|
||||
def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step=None,
|
||||
finite_diff_bounds=(-np.inf, np.inf), epsilon=None, workers=None):
|
||||
|
||||
if not callable(grad) and grad not in FD_METHODS:
|
||||
raise ValueError(
|
||||
f"`grad` must be either callable or one of {FD_METHODS}."
|
||||
)
|
||||
|
||||
if not (callable(hess) or hess in FD_METHODS
|
||||
or isinstance(hess, HessianUpdateStrategy)):
|
||||
raise ValueError(
|
||||
f"`hess` must be either callable, HessianUpdateStrategy"
|
||||
f" or one of {FD_METHODS}."
|
||||
)
|
||||
|
||||
if grad in FD_METHODS and hess in FD_METHODS:
|
||||
raise ValueError("Whenever the gradient is estimated via "
|
||||
"finite-differences, we require the Hessian "
|
||||
"to be estimated using one of the "
|
||||
"quasi-Newton strategies.")
|
||||
self.xp = xp = array_namespace(x0)
|
||||
_x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
|
||||
_dtype = xp.float64
|
||||
if xp.isdtype(_x.dtype, "real floating"):
|
||||
_dtype = _x.dtype
|
||||
|
||||
# original arguments
|
||||
self._wrapped_fun = _ScalarFunctionWrapper(fun, args)
|
||||
self._orig_fun = fun
|
||||
self._orig_grad = grad
|
||||
self._orig_hess = hess
|
||||
self._args = args
|
||||
|
||||
# promotes to floating
|
||||
self.x = xp.astype(_x, _dtype)
|
||||
self.x_dtype = _dtype
|
||||
self.n = self.x.size
|
||||
self.f_updated = False
|
||||
self.g_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
self._lowest_x = None
|
||||
self._lowest_f = np.inf
|
||||
|
||||
# normalize workers
|
||||
workers = workers or map
|
||||
|
||||
finite_diff_options = {}
|
||||
if grad in FD_METHODS:
|
||||
finite_diff_options["method"] = grad
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
finite_diff_options["abs_step"] = epsilon
|
||||
finite_diff_options["bounds"] = finite_diff_bounds
|
||||
finite_diff_options["workers"] = workers
|
||||
finite_diff_options["full_output"] = True
|
||||
if hess in FD_METHODS:
|
||||
finite_diff_options["method"] = hess
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
finite_diff_options["abs_step"] = epsilon
|
||||
finite_diff_options["as_linear_operator"] = True
|
||||
finite_diff_options["workers"] = workers
|
||||
finite_diff_options["full_output"] = True
|
||||
|
||||
# Initial function evaluation
|
||||
self._nfev = 0
|
||||
self._update_fun()
|
||||
|
||||
# Initial gradient evaluation
|
||||
self._wrapped_grad = _ScalarGradWrapper(
|
||||
grad,
|
||||
fun=self._wrapped_fun,
|
||||
args=args,
|
||||
finite_diff_options=finite_diff_options,
|
||||
)
|
||||
self._update_grad()
|
||||
|
||||
# Hessian evaluation
|
||||
if isinstance(hess, HessianUpdateStrategy):
|
||||
self.H = hess
|
||||
self.H.initialize(self.n, 'hess')
|
||||
self.H_updated = True
|
||||
self.x_prev = None
|
||||
self.g_prev = None
|
||||
_FakeCounter = namedtuple('_FakeCounter', ['ngev', 'nhev'])
|
||||
self._wrapped_hess = _FakeCounter(ngev=0, nhev=0)
|
||||
else:
|
||||
if callable(hess):
|
||||
self._wrapped_hess = _ScalarHessWrapper(
|
||||
hess,
|
||||
x0=x0,
|
||||
args=args,
|
||||
finite_diff_options=finite_diff_options
|
||||
)
|
||||
self.H = self._wrapped_hess.H
|
||||
self.H_updated = True
|
||||
elif hess in FD_METHODS:
|
||||
self._wrapped_hess = _ScalarHessWrapper(
|
||||
hess,
|
||||
x0=x0,
|
||||
args=args,
|
||||
grad=self._wrapped_grad,
|
||||
finite_diff_options=finite_diff_options
|
||||
)
|
||||
self._update_grad()
|
||||
self.H = self._wrapped_hess(self.x, f0=self.g)
|
||||
self.H_updated = True
|
||||
|
||||
@property
|
||||
def nfev(self):
|
||||
return self._nfev + self._wrapped_grad.nfev
|
||||
|
||||
@property
|
||||
def ngev(self):
|
||||
return self._wrapped_grad.ngev #+ self._wrapped_hess.ngev
|
||||
|
||||
@property
|
||||
def nhev(self):
|
||||
return self._wrapped_hess.nhev
|
||||
|
||||
def _update_x(self, x):
|
||||
if isinstance(self._orig_hess, HessianUpdateStrategy):
|
||||
self._update_grad()
|
||||
self.x_prev = self.x
|
||||
self.g_prev = self.g
|
||||
# ensure that self.x is a copy of x. Don't store a reference
|
||||
# otherwise the memoization doesn't work properly.
|
||||
|
||||
_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
|
||||
self.x = self.xp.astype(_x, self.x_dtype)
|
||||
self.f_updated = False
|
||||
self.g_updated = False
|
||||
self.H_updated = False
|
||||
self._update_hess()
|
||||
else:
|
||||
# ensure that self.x is a copy of x. Don't store a reference
|
||||
# otherwise the memoization doesn't work properly.
|
||||
_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
|
||||
self.x = self.xp.astype(_x, self.x_dtype)
|
||||
self.f_updated = False
|
||||
self.g_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
def _update_fun(self):
|
||||
if not self.f_updated:
|
||||
fx = self._wrapped_fun(self.x)
|
||||
self._nfev += 1
|
||||
if fx < self._lowest_f:
|
||||
self._lowest_x = self.x
|
||||
self._lowest_f = fx
|
||||
|
||||
self.f = fx
|
||||
self.f_updated = True
|
||||
|
||||
def _update_grad(self):
|
||||
if not self.g_updated:
|
||||
if self._orig_grad in FD_METHODS:
|
||||
self._update_fun()
|
||||
self.g = self._wrapped_grad(self.x, f0=self.f)
|
||||
self.g_updated = True
|
||||
|
||||
def _update_hess(self):
|
||||
if not self.H_updated:
|
||||
if self._orig_hess in FD_METHODS:
|
||||
self._update_grad()
|
||||
self.H = self._wrapped_hess(self.x, f0=self.g)
|
||||
elif isinstance(self._orig_hess, HessianUpdateStrategy):
|
||||
self._update_grad()
|
||||
self.H.update(self.x - self.x_prev, self.g - self.g_prev)
|
||||
else: # should be callable(hess)
|
||||
self.H = self._wrapped_hess(self.x)
|
||||
|
||||
self.H_updated = True
|
||||
|
||||
def fun(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x(x)
|
||||
self._update_fun()
|
||||
return self.f
|
||||
|
||||
def grad(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x(x)
|
||||
self._update_grad()
|
||||
return self.g
|
||||
|
||||
def hess(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x(x)
|
||||
self._update_hess()
|
||||
return self.H
|
||||
|
||||
def fun_and_grad(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x(x)
|
||||
self._update_fun()
|
||||
self._update_grad()
|
||||
return self.f, self.g
|
||||
|
||||
|
||||
class _VectorFunWrapper:
|
||||
def __init__(self, fun):
|
||||
self.fun = fun
|
||||
self.nfev = 0
|
||||
|
||||
def __call__(self, x):
|
||||
self.nfev += 1
|
||||
return np.atleast_1d(self.fun(x))
|
||||
|
||||
|
||||
class _VectorJacWrapper:
|
||||
"""
|
||||
Wrapper class for Jacobian calculation
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
jac,
|
||||
fun=None,
|
||||
finite_diff_options=None,
|
||||
sparse_jacobian=None
|
||||
):
|
||||
self.fun = fun
|
||||
self.jac = jac
|
||||
self.finite_diff_options = finite_diff_options
|
||||
self.sparse_jacobian = sparse_jacobian
|
||||
|
||||
self.njev = 0
|
||||
# number of function evaluations consumed by finite difference
|
||||
self.nfev = 0
|
||||
|
||||
def __call__(self, x, f0=None, **kwds):
|
||||
# Send a copy because the user may overwrite it.
|
||||
# The user of this class might want `x` to remain unchanged.
|
||||
if callable(self.jac):
|
||||
J = self.jac(x)
|
||||
self.njev += 1
|
||||
elif self.jac in FD_METHODS:
|
||||
J, dct = approx_derivative(
|
||||
self.fun,
|
||||
x,
|
||||
f0=f0,
|
||||
**self.finite_diff_options,
|
||||
)
|
||||
self.nfev += dct['nfev']
|
||||
|
||||
if self.sparse_jacobian:
|
||||
return sps.csr_array(J)
|
||||
elif sps.issparse(J):
|
||||
return J.toarray()
|
||||
elif isinstance(J, LinearOperator):
|
||||
return J
|
||||
else:
|
||||
return np.atleast_2d(J)
|
||||
|
||||
|
||||
class _VectorHessWrapper:
|
||||
"""
|
||||
Wrapper class for Jacobian calculation
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
hess,
|
||||
jac=None,
|
||||
finite_diff_options=None,
|
||||
):
|
||||
self.jac = jac
|
||||
self.hess = hess
|
||||
self.finite_diff_options = finite_diff_options
|
||||
self.nhev = 0
|
||||
# number of jac evaluations consumed by finite difference
|
||||
self.njev = 0
|
||||
|
||||
def __call__(self, x, v, J0=None, **kwds):
|
||||
# Send a copy because the user may overwrite it.
|
||||
# The user of this class might want `x` to remain unchanged.
|
||||
if callable(self.hess):
|
||||
self.nhev += 1
|
||||
return self._callable_hess(x, v)
|
||||
elif self.hess in FD_METHODS:
|
||||
return self._fd_hess(x, v, J0=J0)
|
||||
|
||||
def _fd_hess(self, x, v, J0=None):
|
||||
if J0 is None:
|
||||
J0 = self.jac(x)
|
||||
self.njev += 1
|
||||
|
||||
# H will be a LinearOperator
|
||||
H = approx_derivative(self.jac_dot_v, x,
|
||||
f0=J0.T.dot(v),
|
||||
args=(v,),
|
||||
**self.finite_diff_options)
|
||||
return H
|
||||
|
||||
def jac_dot_v(self, x, v):
|
||||
self.njev += 1
|
||||
return self.jac(x).T.dot(v)
|
||||
|
||||
def _callable_hess(self, x, v):
|
||||
H = self.hess(x, v)
|
||||
|
||||
if sps.issparse(H):
|
||||
return sps.csr_array(H)
|
||||
elif isinstance(H, LinearOperator):
|
||||
return H
|
||||
else:
|
||||
return np.atleast_2d(np.asarray(H))
|
||||
|
||||
|
||||
class VectorFunction:
|
||||
"""Vector function and its derivatives.
|
||||
|
||||
This class defines a vector function F: R^n->R^m and methods for
|
||||
computing or approximating its first and second derivatives.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class implements a memoization logic. There are methods `fun`,
|
||||
`jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
|
||||
things should be considered:
|
||||
|
||||
1. Use only public methods `fun`, `jac` and `hess`.
|
||||
2. After one of the methods is called, the corresponding attribute
|
||||
will be set. However, a subsequent call with a different argument
|
||||
of *any* of the methods may overwrite the attribute.
|
||||
"""
|
||||
def __init__(self, fun, x0, jac, hess,
|
||||
finite_diff_rel_step=None, finite_diff_jac_sparsity=None,
|
||||
finite_diff_bounds=(-np.inf, np.inf), sparse_jacobian=None,
|
||||
workers=None):
|
||||
if not callable(jac) and jac not in FD_METHODS:
|
||||
raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.")
|
||||
|
||||
if not (callable(hess) or hess in FD_METHODS
|
||||
or isinstance(hess, HessianUpdateStrategy)):
|
||||
raise ValueError("`hess` must be either callable,"
|
||||
f"HessianUpdateStrategy or one of {FD_METHODS}.")
|
||||
|
||||
if jac in FD_METHODS and hess in FD_METHODS:
|
||||
raise ValueError("Whenever the Jacobian is estimated via "
|
||||
"finite-differences, we require the Hessian to "
|
||||
"be estimated using one of the quasi-Newton "
|
||||
"strategies.")
|
||||
|
||||
self.xp = xp = array_namespace(x0)
|
||||
_x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
|
||||
_dtype = xp.float64
|
||||
if xp.isdtype(_x.dtype, "real floating"):
|
||||
_dtype = _x.dtype
|
||||
|
||||
# store original functions
|
||||
self._orig_fun = fun
|
||||
self._orig_jac = jac
|
||||
self._orig_hess = hess
|
||||
|
||||
# promotes to floating, ensures that it's a copy
|
||||
self.x = xp.astype(_x, _dtype)
|
||||
self.x_dtype = _dtype
|
||||
|
||||
self.n = self.x.size
|
||||
self._nfev = 0
|
||||
self._njev = 0
|
||||
self._nhev = 0
|
||||
self.f_updated = False
|
||||
self.J_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
# normalize workers
|
||||
workers = workers or map
|
||||
|
||||
finite_diff_options = {}
|
||||
if jac in FD_METHODS:
|
||||
finite_diff_options["method"] = jac
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
if finite_diff_jac_sparsity is not None:
|
||||
sparsity_groups = group_columns(finite_diff_jac_sparsity)
|
||||
finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
|
||||
sparsity_groups)
|
||||
finite_diff_options["bounds"] = finite_diff_bounds
|
||||
finite_diff_options["workers"] = workers
|
||||
finite_diff_options["full_output"] = True
|
||||
self.x_diff = np.copy(self.x)
|
||||
if hess in FD_METHODS:
|
||||
finite_diff_options["method"] = hess
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
finite_diff_options["as_linear_operator"] = True
|
||||
# workers is not useful for evaluation of the LinearOperator
|
||||
# produced by approx_derivative. Only two/three function
|
||||
# evaluations are used, and the LinearOperator may persist
|
||||
# outside the scope that workers is valid in.
|
||||
self.x_diff = np.copy(self.x)
|
||||
if jac in FD_METHODS and hess in FD_METHODS:
|
||||
raise ValueError("Whenever the Jacobian is estimated via "
|
||||
"finite-differences, we require the Hessian to "
|
||||
"be estimated using one of the quasi-Newton "
|
||||
"strategies.")
|
||||
|
||||
self.fun_wrapped = _VectorFunWrapper(fun)
|
||||
self._update_fun()
|
||||
|
||||
self.v = np.zeros_like(self.f)
|
||||
self.m = self.v.size
|
||||
|
||||
# Initial Jacobian Evaluation
|
||||
if callable(jac):
|
||||
self.J = jac(xp_copy(self.x))
|
||||
self.J_updated = True
|
||||
self._njev += 1
|
||||
elif jac in FD_METHODS:
|
||||
self.J, dct = approx_derivative(
|
||||
self.fun_wrapped, self.x, f0=self.f, **finite_diff_options
|
||||
)
|
||||
self.J_updated = True
|
||||
self._nfev += dct['nfev']
|
||||
|
||||
self.sparse_jacobian = False
|
||||
if (sparse_jacobian or
|
||||
sparse_jacobian is None and sps.issparse(self.J)):
|
||||
# something truthy was specified for sparse_jacobian,
|
||||
# or it turns out that the Jacobian was sparse.
|
||||
self.J = sps.csr_array(self.J)
|
||||
self.sparse_jacobian = True
|
||||
elif sps.issparse(self.J):
|
||||
self.J = self.J.toarray()
|
||||
elif isinstance(self.J, LinearOperator):
|
||||
pass
|
||||
else:
|
||||
self.J = np.atleast_2d(self.J)
|
||||
|
||||
self.jac_wrapped = _VectorJacWrapper(
|
||||
jac,
|
||||
fun=self.fun_wrapped,
|
||||
finite_diff_options=finite_diff_options,
|
||||
sparse_jacobian=self.sparse_jacobian
|
||||
)
|
||||
|
||||
self.hess_wrapped = _VectorHessWrapper(
|
||||
hess, jac=self.jac_wrapped, finite_diff_options=finite_diff_options
|
||||
)
|
||||
|
||||
# Define Hessian
|
||||
if callable(hess) or hess in FD_METHODS:
|
||||
self.H = self.hess_wrapped(xp_copy(self.x), self.v, J0=self.J)
|
||||
self.H_updated = True
|
||||
if callable(hess):
|
||||
self._nhev += 1
|
||||
elif isinstance(hess, HessianUpdateStrategy):
|
||||
self.H = hess
|
||||
self.H.initialize(self.n, 'hess')
|
||||
self.H_updated = True
|
||||
self.x_prev = None
|
||||
self.J_prev = None
|
||||
|
||||
@property
|
||||
def nfev(self):
|
||||
return self._nfev + self.jac_wrapped.nfev
|
||||
|
||||
@property
|
||||
def njev(self):
|
||||
return self._njev + self.hess_wrapped.njev
|
||||
|
||||
@property
|
||||
def nhev(self):
|
||||
return self._nhev
|
||||
|
||||
def _update_v(self, v):
|
||||
if not np.array_equal(v, self.v):
|
||||
self.v = v
|
||||
self.H_updated = False
|
||||
|
||||
def _update_x(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
if isinstance(self._orig_hess, HessianUpdateStrategy):
|
||||
self._update_jac()
|
||||
self.x_prev = self.x
|
||||
self.J_prev = self.J
|
||||
_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
|
||||
self.x = self.xp.astype(_x, self.x_dtype)
|
||||
self.f_updated = False
|
||||
self.J_updated = False
|
||||
self.H_updated = False
|
||||
self._update_hess()
|
||||
else:
|
||||
_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
|
||||
self.x = self.xp.astype(_x, self.x_dtype)
|
||||
self.f_updated = False
|
||||
self.J_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
def _update_fun(self):
|
||||
if not self.f_updated:
|
||||
self.f = self.fun_wrapped(xp_copy(self.x))
|
||||
self._nfev += 1
|
||||
self.f_updated = True
|
||||
|
||||
def _update_jac(self):
|
||||
if not self.J_updated:
|
||||
if self._orig_jac in FD_METHODS:
|
||||
# need to update fun to get f0
|
||||
self._update_fun()
|
||||
else:
|
||||
self._njev += 1
|
||||
|
||||
self.J = self.jac_wrapped(xp_copy(self.x), f0=self.f)
|
||||
self.J_updated = True
|
||||
|
||||
def _update_hess(self):
|
||||
if not self.H_updated:
|
||||
if callable(self._orig_hess):
|
||||
self.H = self.hess_wrapped(xp_copy(self.x), self.v)
|
||||
self._nhev += 1
|
||||
elif self._orig_hess in FD_METHODS:
|
||||
self._update_jac()
|
||||
self.H = self.hess_wrapped(xp_copy(self.x), self.v, J0=self.J)
|
||||
elif isinstance(self._orig_hess, HessianUpdateStrategy):
|
||||
self._update_jac()
|
||||
# When v is updated before x was updated, then x_prev and
|
||||
# J_prev are None and we need this check.
|
||||
if self.x_prev is not None and self.J_prev is not None:
|
||||
delta_x = self.x - self.x_prev
|
||||
delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
|
||||
self.H.update(delta_x, delta_g)
|
||||
|
||||
self.H_updated = True
|
||||
|
||||
def fun(self, x):
|
||||
self._update_x(x)
|
||||
self._update_fun()
|
||||
# returns a copy so that downstream can't overwrite the
|
||||
# internal attribute
|
||||
return xp_copy(self.f)
|
||||
|
||||
def jac(self, x):
|
||||
self._update_x(x)
|
||||
self._update_jac()
|
||||
if hasattr(self.J, "astype"):
|
||||
# returns a copy so that downstream can't overwrite the
|
||||
# internal attribute. But one can't copy a LinearOperator
|
||||
return self.J.astype(self.J.dtype)
|
||||
return self.J
|
||||
|
||||
def hess(self, x, v):
|
||||
# v should be updated before x.
|
||||
self._update_v(v)
|
||||
self._update_x(x)
|
||||
self._update_hess()
|
||||
if hasattr(self.H, "astype"):
|
||||
# returns a copy so that downstream can't overwrite the
|
||||
# internal attribute. But one can't copy non-arrays
|
||||
return self.H.astype(self.H.dtype)
|
||||
return self.H
|
||||
|
||||
|
||||
class LinearVectorFunction:
|
||||
"""Linear vector function and its derivatives.
|
||||
|
||||
Defines a linear function F = A x, where x is N-D vector and
|
||||
A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
|
||||
is identically zero and it is returned as a csr matrix.
|
||||
"""
|
||||
def __init__(self, A, x0, sparse_jacobian):
|
||||
if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
|
||||
self.J = sps.csr_array(A)
|
||||
self.sparse_jacobian = True
|
||||
elif sps.issparse(A):
|
||||
self.J = A.toarray()
|
||||
self.sparse_jacobian = False
|
||||
else:
|
||||
# np.asarray makes sure A is ndarray and not matrix
|
||||
self.J = np.atleast_2d(np.asarray(A))
|
||||
self.sparse_jacobian = False
|
||||
|
||||
self.m, self.n = self.J.shape
|
||||
|
||||
self.xp = xp = array_namespace(x0)
|
||||
_x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
|
||||
_dtype = xp.float64
|
||||
if xp.isdtype(_x.dtype, "real floating"):
|
||||
_dtype = _x.dtype
|
||||
|
||||
# promotes to floating
|
||||
self.x = xp.astype(_x, _dtype)
|
||||
self.x_dtype = _dtype
|
||||
|
||||
self.f = self.J.dot(self.x)
|
||||
self.f_updated = True
|
||||
|
||||
self.v = np.zeros(self.m, dtype=float)
|
||||
self.H = sps.csr_array((self.n, self.n))
|
||||
|
||||
def _update_x(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
|
||||
self.x = self.xp.astype(_x, self.x_dtype)
|
||||
self.f_updated = False
|
||||
|
||||
def fun(self, x):
|
||||
self._update_x(x)
|
||||
if not self.f_updated:
|
||||
self.f = self.J.dot(x)
|
||||
self.f_updated = True
|
||||
return self.f
|
||||
|
||||
def jac(self, x):
|
||||
self._update_x(x)
|
||||
return self.J
|
||||
|
||||
def hess(self, x, v):
|
||||
self._update_x(x)
|
||||
self.v = v
|
||||
return self.H
|
||||
|
||||
|
||||
class IdentityVectorFunction(LinearVectorFunction):
|
||||
"""Identity vector function and its derivatives.
|
||||
|
||||
The Jacobian is the identity matrix, returned as a dense array when
|
||||
`sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
|
||||
identically zero and it is returned as a csr matrix.
|
||||
"""
|
||||
def __init__(self, x0, sparse_jacobian):
|
||||
n = len(x0)
|
||||
if sparse_jacobian or sparse_jacobian is None:
|
||||
A = sps.eye_array(n, format='csr')
|
||||
sparse_jacobian = True
|
||||
else:
|
||||
A = np.eye(n)
|
||||
sparse_jacobian = False
|
||||
super().__init__(A, x0, sparse_jacobian)
|
||||
File diff suppressed because it is too large
Load diff
Binary file not shown.
280
venv/lib/python3.13/site-packages/scipy/optimize/_direct_py.py
Normal file
280
venv/lib/python3.13/site-packages/scipy/optimize/_direct_py.py
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
from typing import ( # noqa: UP035
|
||||
Any, Callable, Iterable
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import OptimizeResult
|
||||
from ._constraints import old_bound_to_new, Bounds
|
||||
from ._direct import direct as _direct # type: ignore
|
||||
|
||||
__all__ = ['direct']
|
||||
|
||||
ERROR_MESSAGES = (
|
||||
"Number of function evaluations done is larger than maxfun={}",
|
||||
"Number of iterations is larger than maxiter={}",
|
||||
"u[i] < l[i] for some i",
|
||||
"maxfun is too large",
|
||||
"Initialization failed",
|
||||
"There was an error in the creation of the sample points",
|
||||
"An error occurred while the function was sampled",
|
||||
"Maximum number of levels has been reached.",
|
||||
"Forced stop",
|
||||
"Invalid arguments",
|
||||
"Out of memory",
|
||||
)
|
||||
|
||||
SUCCESS_MESSAGES = (
|
||||
("The best function value found is within a relative error={} "
|
||||
"of the (known) global optimum f_min"),
|
||||
("The volume of the hyperrectangle containing the lowest function value "
|
||||
"found is below vol_tol={}"),
|
||||
("The side length measure of the hyperrectangle containing the lowest "
|
||||
"function value found is below len_tol={}"),
|
||||
)
|
||||
|
||||
|
||||
def direct(
|
||||
func: Callable[
|
||||
[np.ndarray[tuple[int], np.dtype[np.float64]]],
|
||||
float | np.floating[Any] | np.integer[Any] | np.bool_,
|
||||
],
|
||||
bounds: Iterable | Bounds,
|
||||
*,
|
||||
args: tuple = (),
|
||||
eps: float = 1e-4,
|
||||
maxfun: int | None = None,
|
||||
maxiter: int = 1000,
|
||||
locally_biased: bool = True,
|
||||
f_min: float = -np.inf,
|
||||
f_min_rtol: float = 1e-4,
|
||||
vol_tol: float = 1e-16,
|
||||
len_tol: float = 1e-6,
|
||||
callback: Callable[
|
||||
[np.ndarray[tuple[int], np.dtype[np.float64]]],
|
||||
object,
|
||||
] | None = None,
|
||||
) -> OptimizeResult:
|
||||
"""
|
||||
Finds the global minimum of a function using the
|
||||
DIRECT algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The objective function to be minimized.
|
||||
``func(x, *args) -> float``
|
||||
where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of
|
||||
the fixed parameters needed to completely specify the function.
|
||||
bounds : sequence or `Bounds`
|
||||
Bounds for variables. There are two ways to specify the bounds:
|
||||
|
||||
1. Instance of `Bounds` class.
|
||||
2. ``(min, max)`` pairs for each element in ``x``.
|
||||
|
||||
args : tuple, optional
|
||||
Any additional fixed parameters needed to
|
||||
completely specify the objective function.
|
||||
eps : float, optional
|
||||
Minimal required difference of the objective function values
|
||||
between the current best hyperrectangle and the next potentially
|
||||
optimal hyperrectangle to be divided. In consequence, `eps` serves as a
|
||||
tradeoff between local and global search: the smaller, the more local
|
||||
the search becomes. Default is 1e-4.
|
||||
maxfun : int or None, optional
|
||||
Approximate upper bound on objective function evaluations.
|
||||
If `None`, will be automatically set to ``1000 * N`` where ``N``
|
||||
represents the number of dimensions. Will be capped if necessary to
|
||||
limit DIRECT's RAM usage to app. 1GiB. This will only occur for very
|
||||
high dimensional problems and excessive `max_fun`. Default is `None`.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations. Default is 1000.
|
||||
locally_biased : bool, optional
|
||||
If `True` (default), use the locally biased variant of the
|
||||
algorithm known as DIRECT_L. If `False`, use the original unbiased
|
||||
DIRECT algorithm. For hard problems with many local minima,
|
||||
`False` is recommended.
|
||||
f_min : float, optional
|
||||
Function value of the global optimum. Set this value only if the
|
||||
global optimum is known. Default is ``-np.inf``, so that this
|
||||
termination criterion is deactivated.
|
||||
f_min_rtol : float, optional
|
||||
Terminate the optimization once the relative error between the
|
||||
current best minimum `f` and the supplied global minimum `f_min`
|
||||
is smaller than `f_min_rtol`. This parameter is only used if
|
||||
`f_min` is also set. Must lie between 0 and 1. Default is 1e-4.
|
||||
vol_tol : float, optional
|
||||
Terminate the optimization once the volume of the hyperrectangle
|
||||
containing the lowest function value is smaller than `vol_tol`
|
||||
of the complete search space. Must lie between 0 and 1.
|
||||
Default is 1e-16.
|
||||
len_tol : float, optional
|
||||
If ``locally_biased=True``, terminate the optimization once half of
|
||||
the normalized maximal side length of the hyperrectangle containing
|
||||
the lowest function value is smaller than `len_tol`.
|
||||
If ``locally_biased=False``, terminate the optimization once half of
|
||||
the normalized diagonal of the hyperrectangle containing the lowest
|
||||
function value is smaller than `len_tol`. Must lie between 0 and 1.
|
||||
Default is 1e-6.
|
||||
callback : callable, optional
|
||||
A callback function with signature ``callback(xk)`` where ``xk``
|
||||
represents the best function value found so far.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a ``OptimizeResult`` object.
|
||||
Important attributes are: ``x`` the solution array, ``success`` a
|
||||
Boolean flag indicating if the optimizer exited successfully and
|
||||
``message`` which describes the cause of the termination. See
|
||||
`OptimizeResult` for a description of other attributes.
|
||||
|
||||
Notes
|
||||
-----
|
||||
DIviding RECTangles (DIRECT) is a deterministic global
|
||||
optimization algorithm capable of minimizing a black box function with
|
||||
its variables subject to lower and upper bound constraints by sampling
|
||||
potential solutions in the search space [1]_. The algorithm starts by
|
||||
normalising the search space to an n-dimensional unit hypercube.
|
||||
It samples the function at the center of this hypercube and at 2n
|
||||
(n is the number of variables) more points, 2 in each coordinate
|
||||
direction. Using these function values, DIRECT then divides the
|
||||
domain into hyperrectangles, each having exactly one of the sampling
|
||||
points as its center. In each iteration, DIRECT chooses, using the `eps`
|
||||
parameter which defaults to 1e-4, some of the existing hyperrectangles
|
||||
to be further divided. This division process continues until either the
|
||||
maximum number of iterations or maximum function evaluations allowed
|
||||
are exceeded, or the hyperrectangle containing the minimal value found
|
||||
so far becomes small enough. If `f_min` is specified, the optimization
|
||||
will stop once this function value is reached within a relative tolerance.
|
||||
The locally biased variant of DIRECT (originally called DIRECT_L) [2]_ is
|
||||
used by default. It makes the search more locally biased and more
|
||||
efficient for cases with only a few local minima.
|
||||
|
||||
A note about termination criteria: `vol_tol` refers to the volume of the
|
||||
hyperrectangle containing the lowest function value found so far. This
|
||||
volume decreases exponentially with increasing dimensionality of the
|
||||
problem. Therefore `vol_tol` should be decreased to avoid premature
|
||||
termination of the algorithm for higher dimensions. This does not hold
|
||||
for `len_tol`: it refers either to half of the maximal side length
|
||||
(for ``locally_biased=True``) or half of the diagonal of the
|
||||
hyperrectangle (for ``locally_biased=False``).
|
||||
|
||||
This code is based on the DIRECT 2.0.4 Fortran code by Gablonsky et al. at
|
||||
https://ctk.math.ncsu.edu/SOFTWARE/DIRECTv204.tar.gz .
|
||||
This original version was initially converted via f2c and then cleaned up
|
||||
and reorganized by Steven G. Johnson, August 2007, for the NLopt project.
|
||||
The `direct` function wraps the C implementation.
|
||||
|
||||
.. versionadded:: 1.9.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jones, D.R., Perttunen, C.D. & Stuckman, B.E. Lipschitzian
|
||||
optimization without the Lipschitz constant. J Optim Theory Appl
|
||||
79, 157-181 (1993).
|
||||
.. [2] Gablonsky, J., Kelley, C. A Locally-Biased form of the DIRECT
|
||||
Algorithm. Journal of Global Optimization 21, 27-37 (2001).
|
||||
|
||||
Examples
|
||||
--------
|
||||
The following example is a 2-D problem with four local minima: minimizing
|
||||
the Styblinski-Tang function
|
||||
(https://en.wikipedia.org/wiki/Test_functions_for_optimization).
|
||||
|
||||
>>> from scipy.optimize import direct, Bounds
|
||||
>>> def styblinski_tang(pos):
|
||||
... x, y = pos
|
||||
... return 0.5 * (x**4 - 16*x**2 + 5*x + y**4 - 16*y**2 + 5*y)
|
||||
>>> bounds = Bounds([-4., -4.], [4., 4.])
|
||||
>>> result = direct(styblinski_tang, bounds)
|
||||
>>> result.x, result.fun, result.nfev
|
||||
array([-2.90321597, -2.90321597]), -78.3323279095383, 2011
|
||||
|
||||
The correct global minimum was found but with a huge number of function
|
||||
evaluations (2011). Loosening the termination tolerances `vol_tol` and
|
||||
`len_tol` can be used to stop DIRECT earlier.
|
||||
|
||||
>>> result = direct(styblinski_tang, bounds, len_tol=1e-3)
|
||||
>>> result.x, result.fun, result.nfev
|
||||
array([-2.9044353, -2.9044353]), -78.33230330754142, 207
|
||||
|
||||
"""
|
||||
# convert bounds to new Bounds class if necessary
|
||||
if not isinstance(bounds, Bounds):
|
||||
if isinstance(bounds, list) or isinstance(bounds, tuple):
|
||||
lb, ub = old_bound_to_new(bounds)
|
||||
bounds = Bounds(lb, ub)
|
||||
else:
|
||||
message = ("bounds must be a sequence or "
|
||||
"instance of Bounds class")
|
||||
raise ValueError(message)
|
||||
|
||||
lb = np.ascontiguousarray(bounds.lb, dtype=np.float64)
|
||||
ub = np.ascontiguousarray(bounds.ub, dtype=np.float64)
|
||||
|
||||
# validate bounds
|
||||
# check that lower bounds are smaller than upper bounds
|
||||
if not np.all(lb < ub):
|
||||
raise ValueError('Bounds are not consistent min < max')
|
||||
# check for infs
|
||||
if (np.any(np.isinf(lb)) or np.any(np.isinf(ub))):
|
||||
raise ValueError("Bounds must not be inf.")
|
||||
|
||||
# validate tolerances
|
||||
if (vol_tol < 0 or vol_tol > 1):
|
||||
raise ValueError("vol_tol must be between 0 and 1.")
|
||||
if (len_tol < 0 or len_tol > 1):
|
||||
raise ValueError("len_tol must be between 0 and 1.")
|
||||
if (f_min_rtol < 0 or f_min_rtol > 1):
|
||||
raise ValueError("f_min_rtol must be between 0 and 1.")
|
||||
|
||||
# validate maxfun and maxiter
|
||||
if maxfun is None:
|
||||
maxfun = 1000 * lb.shape[0]
|
||||
if not isinstance(maxfun, int):
|
||||
raise ValueError("maxfun must be of type int.")
|
||||
if maxfun < 0:
|
||||
raise ValueError("maxfun must be > 0.")
|
||||
if not isinstance(maxiter, int):
|
||||
raise ValueError("maxiter must be of type int.")
|
||||
if maxiter < 0:
|
||||
raise ValueError("maxiter must be > 0.")
|
||||
|
||||
# validate boolean parameters
|
||||
if not isinstance(locally_biased, bool):
|
||||
raise ValueError("locally_biased must be True or False.")
|
||||
|
||||
def _func_wrap(x, args=None):
|
||||
x = np.asarray(x)
|
||||
if args is None:
|
||||
f = func(x)
|
||||
else:
|
||||
f = func(x, *args)
|
||||
# always return a float
|
||||
return np.asarray(f).item()
|
||||
|
||||
# TODO: fix disp argument
|
||||
x, fun, ret_code, nfev, nit = _direct(
|
||||
_func_wrap,
|
||||
np.asarray(lb), np.asarray(ub),
|
||||
args,
|
||||
False, eps, maxfun, maxiter,
|
||||
locally_biased,
|
||||
f_min, f_min_rtol,
|
||||
vol_tol, len_tol, callback
|
||||
)
|
||||
|
||||
format_val = (maxfun, maxiter, f_min_rtol, vol_tol, len_tol)
|
||||
if ret_code > 2:
|
||||
message = SUCCESS_MESSAGES[ret_code - 3].format(
|
||||
format_val[ret_code - 1])
|
||||
elif 0 < ret_code <= 2:
|
||||
message = ERROR_MESSAGES[ret_code - 1].format(format_val[ret_code - 1])
|
||||
elif 0 > ret_code > -100:
|
||||
message = ERROR_MESSAGES[abs(ret_code) + 1]
|
||||
else:
|
||||
message = ERROR_MESSAGES[ret_code + 99]
|
||||
|
||||
return OptimizeResult(x=np.asarray(x), fun=fun, status=ret_code,
|
||||
success=ret_code > 2, message=message,
|
||||
nfev=nfev, nit=nit)
|
||||
|
|
@ -0,0 +1,732 @@
|
|||
# Dual Annealing implementation.
|
||||
# Copyright (c) 2018 Sylvain Gubian <sylvain.gubian@pmi.com>,
|
||||
# Yang Xiang <yang.xiang@pmi.com>
|
||||
# Author: Sylvain Gubian, Yang Xiang, PMP S.A.
|
||||
|
||||
"""
|
||||
A Dual Annealing global optimization algorithm
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import OptimizeResult
|
||||
from scipy.optimize import minimize, Bounds
|
||||
from scipy.special import gammaln
|
||||
from scipy._lib._util import check_random_state, _transition_to_rng
|
||||
from scipy.optimize._constraints import new_bounds_to_old
|
||||
|
||||
__all__ = ['dual_annealing']
|
||||
|
||||
|
||||
class VisitingDistribution:
|
||||
"""
|
||||
Class used to generate new coordinates based on the distorted
|
||||
Cauchy-Lorentz distribution. Depending on the steps within the strategy
|
||||
chain, the class implements the strategy for generating new location
|
||||
changes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lb : array_like
|
||||
A 1-D NumPy ndarray containing lower bounds of the generated
|
||||
components. Neither NaN or inf are allowed.
|
||||
ub : array_like
|
||||
A 1-D NumPy ndarray containing upper bounds for the generated
|
||||
components. Neither NaN or inf are allowed.
|
||||
visiting_param : float
|
||||
Parameter for visiting distribution. Default value is 2.62.
|
||||
Higher values give the visiting distribution a heavier tail, this
|
||||
makes the algorithm jump to a more distant region.
|
||||
The value range is (1, 3]. Its value is fixed for the life of the
|
||||
object.
|
||||
rng_gen : {`~numpy.random.Generator`}
|
||||
A `~numpy.random.Generator` object for generating new locations.
|
||||
(can be a `~numpy.random.RandomState` object until SPEC007 transition
|
||||
is fully complete).
|
||||
|
||||
"""
|
||||
TAIL_LIMIT = 1.e8
|
||||
MIN_VISIT_BOUND = 1.e-10
|
||||
|
||||
def __init__(self, lb, ub, visiting_param, rng_gen):
|
||||
# if you wish to make _visiting_param adjustable during the life of
|
||||
# the object then _factor2, _factor3, _factor5, _d1, _factor6 will
|
||||
# have to be dynamically calculated in `visit_fn`. They're factored
|
||||
# out here so they don't need to be recalculated all the time.
|
||||
self._visiting_param = visiting_param
|
||||
self.rng_gen = rng_gen
|
||||
self.lower = lb
|
||||
self.upper = ub
|
||||
self.bound_range = ub - lb
|
||||
|
||||
# these are invariant numbers unless visiting_param changes
|
||||
self._factor2 = np.exp((4.0 - self._visiting_param) * np.log(
|
||||
self._visiting_param - 1.0))
|
||||
self._factor3 = np.exp((2.0 - self._visiting_param) * np.log(2.0)
|
||||
/ (self._visiting_param - 1.0))
|
||||
self._factor4_p = np.sqrt(np.pi) * self._factor2 / (self._factor3 * (
|
||||
3.0 - self._visiting_param))
|
||||
|
||||
self._factor5 = 1.0 / (self._visiting_param - 1.0) - 0.5
|
||||
self._d1 = 2.0 - self._factor5
|
||||
self._factor6 = np.pi * (1.0 - self._factor5) / np.sin(
|
||||
np.pi * (1.0 - self._factor5)) / np.exp(gammaln(self._d1))
|
||||
|
||||
def visiting(self, x, step, temperature):
|
||||
""" Based on the step in the strategy chain, new coordinates are
|
||||
generated by changing all components is the same time or only
|
||||
one of them, the new values are computed with visit_fn method
|
||||
"""
|
||||
dim = x.size
|
||||
if step < dim:
|
||||
# Changing all coordinates with a new visiting value
|
||||
visits = self.visit_fn(temperature, dim)
|
||||
upper_sample, lower_sample = self.rng_gen.uniform(size=2)
|
||||
visits[visits > self.TAIL_LIMIT] = self.TAIL_LIMIT * upper_sample
|
||||
visits[visits < -self.TAIL_LIMIT] = -self.TAIL_LIMIT * lower_sample
|
||||
x_visit = visits + x
|
||||
a = x_visit - self.lower
|
||||
b = np.fmod(a, self.bound_range) + self.bound_range
|
||||
x_visit = np.fmod(b, self.bound_range) + self.lower
|
||||
x_visit[np.fabs(
|
||||
x_visit - self.lower) < self.MIN_VISIT_BOUND] += 1.e-10
|
||||
else:
|
||||
# Changing only one coordinate at a time based on strategy
|
||||
# chain step
|
||||
x_visit = np.copy(x)
|
||||
visit = self.visit_fn(temperature, 1)[0]
|
||||
if visit > self.TAIL_LIMIT:
|
||||
visit = self.TAIL_LIMIT * self.rng_gen.uniform()
|
||||
elif visit < -self.TAIL_LIMIT:
|
||||
visit = -self.TAIL_LIMIT * self.rng_gen.uniform()
|
||||
index = step - dim
|
||||
x_visit[index] = visit + x[index]
|
||||
a = x_visit[index] - self.lower[index]
|
||||
b = np.fmod(a, self.bound_range[index]) + self.bound_range[index]
|
||||
x_visit[index] = np.fmod(b, self.bound_range[
|
||||
index]) + self.lower[index]
|
||||
if np.fabs(x_visit[index] - self.lower[
|
||||
index]) < self.MIN_VISIT_BOUND:
|
||||
x_visit[index] += self.MIN_VISIT_BOUND
|
||||
return x_visit
|
||||
|
||||
def visit_fn(self, temperature, dim):
|
||||
""" Formula Visita from p. 405 of reference [2] """
|
||||
x, y = self.rng_gen.normal(size=(dim, 2)).T
|
||||
|
||||
factor1 = np.exp(np.log(temperature) / (self._visiting_param - 1.0))
|
||||
factor4 = self._factor4_p * factor1
|
||||
|
||||
# sigmax
|
||||
x *= np.exp(-(self._visiting_param - 1.0) * np.log(
|
||||
self._factor6 / factor4) / (3.0 - self._visiting_param))
|
||||
|
||||
den = np.exp((self._visiting_param - 1.0) * np.log(np.fabs(y)) /
|
||||
(3.0 - self._visiting_param))
|
||||
|
||||
return x / den
|
||||
|
||||
|
||||
class EnergyState:
|
||||
"""
|
||||
Class used to record the energy state. At any time, it knows what is the
|
||||
currently used coordinates and the most recent best location.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lower : array_like
|
||||
A 1-D NumPy ndarray containing lower bounds for generating an initial
|
||||
random components in the `reset` method.
|
||||
upper : array_like
|
||||
A 1-D NumPy ndarray containing upper bounds for generating an initial
|
||||
random components in the `reset` method
|
||||
components. Neither NaN or inf are allowed.
|
||||
callback : callable, ``callback(x, f, context)``, optional
|
||||
A callback function which will be called for all minima found.
|
||||
``x`` and ``f`` are the coordinates and function value of the
|
||||
latest minimum found, and `context` has value in [0, 1, 2]
|
||||
"""
|
||||
# Maximum number of trials for generating a valid starting point
|
||||
MAX_REINIT_COUNT = 1000
|
||||
|
||||
def __init__(self, lower, upper, callback=None):
|
||||
self.ebest = None
|
||||
self.current_energy = None
|
||||
self.current_location = None
|
||||
self.xbest = None
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.callback = callback
|
||||
|
||||
def reset(self, func_wrapper, rng_gen, x0=None):
|
||||
"""
|
||||
Initialize current location is the search domain. If `x0` is not
|
||||
provided, a random location within the bounds is generated.
|
||||
"""
|
||||
if x0 is None:
|
||||
self.current_location = rng_gen.uniform(self.lower, self.upper,
|
||||
size=len(self.lower))
|
||||
else:
|
||||
self.current_location = np.copy(x0)
|
||||
init_error = True
|
||||
reinit_counter = 0
|
||||
while init_error:
|
||||
self.current_energy = func_wrapper.fun(self.current_location)
|
||||
if self.current_energy is None:
|
||||
raise ValueError('Objective function is returning None')
|
||||
if not np.isfinite(self.current_energy):
|
||||
if reinit_counter >= EnergyState.MAX_REINIT_COUNT:
|
||||
init_error = False
|
||||
message = (
|
||||
'Stopping algorithm because function '
|
||||
'create NaN or (+/-) infinity values even with '
|
||||
'trying new random parameters'
|
||||
)
|
||||
raise ValueError(message)
|
||||
self.current_location = rng_gen.uniform(self.lower,
|
||||
self.upper,
|
||||
size=self.lower.size)
|
||||
reinit_counter += 1
|
||||
else:
|
||||
init_error = False
|
||||
# If first time reset, initialize ebest and xbest
|
||||
if self.ebest is None and self.xbest is None:
|
||||
self.ebest = self.current_energy
|
||||
self.xbest = np.copy(self.current_location)
|
||||
# Otherwise, we keep them in case of reannealing reset
|
||||
|
||||
def update_best(self, e, x, context):
|
||||
self.ebest = e
|
||||
self.xbest = np.copy(x)
|
||||
if self.callback is not None:
|
||||
val = self.callback(x, e, context)
|
||||
if val is not None:
|
||||
if val:
|
||||
return ('Callback function requested to stop early by '
|
||||
'returning True')
|
||||
|
||||
def update_current(self, e, x):
|
||||
self.current_energy = e
|
||||
self.current_location = np.copy(x)
|
||||
|
||||
|
||||
class StrategyChain:
|
||||
"""
|
||||
Class that implements within a Markov chain the strategy for location
|
||||
acceptance and local search decision making.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
acceptance_param : float
|
||||
Parameter for acceptance distribution. It is used to control the
|
||||
probability of acceptance. The lower the acceptance parameter, the
|
||||
smaller the probability of acceptance. Default value is -5.0 with
|
||||
a range (-1e4, -5].
|
||||
visit_dist : VisitingDistribution
|
||||
Instance of `VisitingDistribution` class.
|
||||
func_wrapper : ObjectiveFunWrapper
|
||||
Instance of `ObjectiveFunWrapper` class.
|
||||
minimizer_wrapper: LocalSearchWrapper
|
||||
Instance of `LocalSearchWrapper` class.
|
||||
rand_gen : {None, int, `numpy.random.Generator`,
|
||||
`numpy.random.RandomState`}, optional
|
||||
|
||||
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
||||
singleton is used.
|
||||
If `seed` is an int, a new ``RandomState`` instance is used,
|
||||
seeded with `seed`.
|
||||
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
||||
that instance is used.
|
||||
energy_state: EnergyState
|
||||
Instance of `EnergyState` class.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, acceptance_param, visit_dist, func_wrapper,
|
||||
minimizer_wrapper, rand_gen, energy_state):
|
||||
# Local strategy chain minimum energy and location
|
||||
self.emin = energy_state.current_energy
|
||||
self.xmin = np.array(energy_state.current_location)
|
||||
# Global optimizer state
|
||||
self.energy_state = energy_state
|
||||
# Acceptance parameter
|
||||
self.acceptance_param = acceptance_param
|
||||
# Visiting distribution instance
|
||||
self.visit_dist = visit_dist
|
||||
# Wrapper to objective function
|
||||
self.func_wrapper = func_wrapper
|
||||
# Wrapper to the local minimizer
|
||||
self.minimizer_wrapper = minimizer_wrapper
|
||||
self.not_improved_idx = 0
|
||||
self.not_improved_max_idx = 1000
|
||||
self._rand_gen = rand_gen
|
||||
self.temperature_step = 0
|
||||
self.K = 100 * len(energy_state.current_location)
|
||||
|
||||
def accept_reject(self, j, e, x_visit):
|
||||
r = self._rand_gen.uniform()
|
||||
pqv_temp = 1.0 - ((1.0 - self.acceptance_param) *
|
||||
(e - self.energy_state.current_energy) / self.temperature_step)
|
||||
if pqv_temp <= 0.:
|
||||
pqv = 0.
|
||||
else:
|
||||
pqv = np.exp(np.log(pqv_temp) / (
|
||||
1. - self.acceptance_param))
|
||||
|
||||
if r <= pqv:
|
||||
# We accept the new location and update state
|
||||
self.energy_state.update_current(e, x_visit)
|
||||
self.xmin = np.copy(self.energy_state.current_location)
|
||||
|
||||
# No improvement for a long time
|
||||
if self.not_improved_idx >= self.not_improved_max_idx:
|
||||
if j == 0 or self.energy_state.current_energy < self.emin:
|
||||
self.emin = self.energy_state.current_energy
|
||||
self.xmin = np.copy(self.energy_state.current_location)
|
||||
|
||||
def run(self, step, temperature):
|
||||
self.temperature_step = temperature / float(step + 1)
|
||||
self.not_improved_idx += 1
|
||||
for j in range(self.energy_state.current_location.size * 2):
|
||||
if j == 0:
|
||||
if step == 0:
|
||||
self.energy_state_improved = True
|
||||
else:
|
||||
self.energy_state_improved = False
|
||||
x_visit = self.visit_dist.visiting(
|
||||
self.energy_state.current_location, j, temperature)
|
||||
# Calling the objective function
|
||||
e = self.func_wrapper.fun(x_visit)
|
||||
if e < self.energy_state.current_energy:
|
||||
# We have got a better energy value
|
||||
self.energy_state.update_current(e, x_visit)
|
||||
if e < self.energy_state.ebest:
|
||||
val = self.energy_state.update_best(e, x_visit, 0)
|
||||
if val is not None:
|
||||
if val:
|
||||
return val
|
||||
self.energy_state_improved = True
|
||||
self.not_improved_idx = 0
|
||||
else:
|
||||
# We have not improved but do we accept the new location?
|
||||
self.accept_reject(j, e, x_visit)
|
||||
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
||||
return ('Maximum number of function call reached '
|
||||
'during annealing')
|
||||
# End of StrategyChain loop
|
||||
|
||||
def local_search(self):
|
||||
# Decision making for performing a local search
|
||||
# based on strategy chain results
|
||||
# If energy has been improved or no improvement since too long,
|
||||
# performing a local search with the best strategy chain location
|
||||
if self.energy_state_improved:
|
||||
# Global energy has improved, let's see if LS improves further
|
||||
e, x = self.minimizer_wrapper.local_search(self.energy_state.xbest,
|
||||
self.energy_state.ebest)
|
||||
if e < self.energy_state.ebest:
|
||||
self.not_improved_idx = 0
|
||||
val = self.energy_state.update_best(e, x, 1)
|
||||
if val is not None:
|
||||
if val:
|
||||
return val
|
||||
self.energy_state.update_current(e, x)
|
||||
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
||||
return ('Maximum number of function call reached '
|
||||
'during local search')
|
||||
# Check probability of a need to perform a LS even if no improvement
|
||||
do_ls = False
|
||||
if self.K < 90 * len(self.energy_state.current_location):
|
||||
pls = np.exp(self.K * (
|
||||
self.energy_state.ebest - self.energy_state.current_energy) /
|
||||
self.temperature_step)
|
||||
if pls >= self._rand_gen.uniform():
|
||||
do_ls = True
|
||||
# Global energy not improved, let's see what LS gives
|
||||
# on the best strategy chain location
|
||||
if self.not_improved_idx >= self.not_improved_max_idx:
|
||||
do_ls = True
|
||||
if do_ls:
|
||||
e, x = self.minimizer_wrapper.local_search(self.xmin, self.emin)
|
||||
self.xmin = np.copy(x)
|
||||
self.emin = e
|
||||
self.not_improved_idx = 0
|
||||
self.not_improved_max_idx = self.energy_state.current_location.size
|
||||
if e < self.energy_state.ebest:
|
||||
val = self.energy_state.update_best(
|
||||
self.emin, self.xmin, 2)
|
||||
if val is not None:
|
||||
if val:
|
||||
return val
|
||||
self.energy_state.update_current(e, x)
|
||||
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
||||
return ('Maximum number of function call reached '
|
||||
'during dual annealing')
|
||||
|
||||
|
||||
class ObjectiveFunWrapper:
|
||||
|
||||
def __init__(self, func, maxfun=1e7, *args):
|
||||
self.func = func
|
||||
self.args = args
|
||||
# Number of objective function evaluations
|
||||
self.nfev = 0
|
||||
# Number of gradient function evaluation if used
|
||||
self.ngev = 0
|
||||
# Number of hessian of the objective function if used
|
||||
self.nhev = 0
|
||||
self.maxfun = maxfun
|
||||
|
||||
def fun(self, x):
|
||||
self.nfev += 1
|
||||
return self.func(x, *self.args)
|
||||
|
||||
|
||||
class LocalSearchWrapper:
|
||||
"""
|
||||
Class used to wrap around the minimizer used for local search
|
||||
Default local minimizer is SciPy minimizer L-BFGS-B
|
||||
"""
|
||||
|
||||
LS_MAXITER_RATIO = 6
|
||||
LS_MAXITER_MIN = 100
|
||||
LS_MAXITER_MAX = 1000
|
||||
|
||||
def __init__(self, search_bounds, func_wrapper, *args, **kwargs):
|
||||
self.func_wrapper = func_wrapper
|
||||
self.kwargs = kwargs
|
||||
self.jac = self.kwargs.get('jac', None)
|
||||
self.hess = self.kwargs.get('hess', None)
|
||||
self.hessp = self.kwargs.get('hessp', None)
|
||||
self.kwargs.pop("args", None)
|
||||
self.minimizer = minimize
|
||||
bounds_list = list(zip(*search_bounds))
|
||||
self.lower = np.array(bounds_list[0])
|
||||
self.upper = np.array(bounds_list[1])
|
||||
|
||||
# If no minimizer specified, use SciPy minimize with 'L-BFGS-B' method
|
||||
if not self.kwargs:
|
||||
n = len(self.lower)
|
||||
ls_max_iter = min(max(n * self.LS_MAXITER_RATIO,
|
||||
self.LS_MAXITER_MIN),
|
||||
self.LS_MAXITER_MAX)
|
||||
self.kwargs['method'] = 'L-BFGS-B'
|
||||
self.kwargs['options'] = {
|
||||
'maxiter': ls_max_iter,
|
||||
}
|
||||
self.kwargs['bounds'] = list(zip(self.lower, self.upper))
|
||||
else:
|
||||
if callable(self.jac):
|
||||
def wrapped_jac(x):
|
||||
return self.jac(x, *args)
|
||||
self.kwargs['jac'] = wrapped_jac
|
||||
if callable(self.hess):
|
||||
def wrapped_hess(x):
|
||||
return self.hess(x, *args)
|
||||
self.kwargs['hess'] = wrapped_hess
|
||||
if callable(self.hessp):
|
||||
def wrapped_hessp(x, p):
|
||||
return self.hessp(x, p, *args)
|
||||
self.kwargs['hessp'] = wrapped_hessp
|
||||
|
||||
def local_search(self, x, e):
|
||||
# Run local search from the given x location where energy value is e
|
||||
x_tmp = np.copy(x)
|
||||
mres = self.minimizer(self.func_wrapper.fun, x, **self.kwargs)
|
||||
if 'njev' in mres:
|
||||
self.func_wrapper.ngev += mres.njev
|
||||
if 'nhev' in mres:
|
||||
self.func_wrapper.nhev += mres.nhev
|
||||
# Check if is valid value
|
||||
is_finite = np.all(np.isfinite(mres.x)) and np.isfinite(mres.fun)
|
||||
in_bounds = np.all(mres.x >= self.lower) and np.all(
|
||||
mres.x <= self.upper)
|
||||
is_valid = is_finite and in_bounds
|
||||
|
||||
# Use the new point only if it is valid and return a better results
|
||||
if is_valid and mres.fun < e:
|
||||
return mres.fun, mres.x
|
||||
else:
|
||||
return e, x_tmp
|
||||
|
||||
|
||||
@_transition_to_rng("seed", position_num=10)
|
||||
def dual_annealing(func, bounds, args=(), maxiter=1000,
|
||||
minimizer_kwargs=None, initial_temp=5230.,
|
||||
restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
|
||||
maxfun=1e7, rng=None, no_local_search=False,
|
||||
callback=None, x0=None):
|
||||
"""
|
||||
Find the global minimum of a function using Dual Annealing.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The objective function to be minimized. Must be in the form
|
||||
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
|
||||
and ``args`` is a tuple of any additional fixed parameters needed to
|
||||
completely specify the function.
|
||||
bounds : sequence or `Bounds`
|
||||
Bounds for variables. There are two ways to specify the bounds:
|
||||
|
||||
1. Instance of `Bounds` class.
|
||||
2. Sequence of ``(min, max)`` pairs for each element in `x`.
|
||||
|
||||
args : tuple, optional
|
||||
Any additional fixed parameters needed to completely specify the
|
||||
objective function.
|
||||
maxiter : int, optional
|
||||
The maximum number of global search iterations. Default value is 1000.
|
||||
minimizer_kwargs : dict, optional
|
||||
Keyword arguments to be passed to the local minimizer
|
||||
(`minimize`). An important option could be ``method`` for the minimizer
|
||||
method to use.
|
||||
If no keyword arguments are provided, the local minimizer defaults to
|
||||
'L-BFGS-B' and uses the already supplied bounds. If `minimizer_kwargs`
|
||||
is specified, then the dict must contain all parameters required to
|
||||
control the local minimization. `args` is ignored in this dict, as it is
|
||||
passed automatically. `bounds` is not automatically passed on to the
|
||||
local minimizer as the method may not support them.
|
||||
initial_temp : float, optional
|
||||
The initial temperature, use higher values to facilitates a wider
|
||||
search of the energy landscape, allowing dual_annealing to escape
|
||||
local minima that it is trapped in. Default value is 5230. Range is
|
||||
(0.01, 5.e4].
|
||||
restart_temp_ratio : float, optional
|
||||
During the annealing process, temperature is decreasing, when it
|
||||
reaches ``initial_temp * restart_temp_ratio``, the reannealing process
|
||||
is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
|
||||
visit : float, optional
|
||||
Parameter for visiting distribution. Default value is 2.62. Higher
|
||||
values give the visiting distribution a heavier tail, this makes
|
||||
the algorithm jump to a more distant region. The value range is (1, 3].
|
||||
accept : float, optional
|
||||
Parameter for acceptance distribution. It is used to control the
|
||||
probability of acceptance. The lower the acceptance parameter, the
|
||||
smaller the probability of acceptance. Default value is -5.0 with
|
||||
a range (-1e4, -5].
|
||||
maxfun : int, optional
|
||||
Soft limit for the number of objective function calls. If the
|
||||
algorithm is in the middle of a local search, this number will be
|
||||
exceeded, the algorithm will stop just after the local search is
|
||||
done. Default value is 1e7.
|
||||
rng : `numpy.random.Generator`, optional
|
||||
Pseudorandom number generator state. When `rng` is None, a new
|
||||
`numpy.random.Generator` is created using entropy from the
|
||||
operating system. Types other than `numpy.random.Generator` are
|
||||
passed to `numpy.random.default_rng` to instantiate a `Generator`.
|
||||
|
||||
Specify `rng` for repeatable minimizations. The random numbers
|
||||
generated only affect the visiting distribution function
|
||||
and new coordinates generation.
|
||||
no_local_search : bool, optional
|
||||
If `no_local_search` is set to True, a traditional Generalized
|
||||
Simulated Annealing will be performed with no local search
|
||||
strategy applied.
|
||||
callback : callable, optional
|
||||
A callback function with signature ``callback(x, f, context)``,
|
||||
which will be called for all minima found.
|
||||
``x`` and ``f`` are the coordinates and function value of the
|
||||
latest minimum found, and ``context`` has one of the following
|
||||
values:
|
||||
|
||||
- ``0``: minimum detected in the annealing process.
|
||||
- ``1``: detection occurred in the local search process.
|
||||
- ``2``: detection done in the dual annealing process.
|
||||
|
||||
If the callback implementation returns True, the algorithm will stop.
|
||||
x0 : ndarray, shape(n,), optional
|
||||
Coordinates of a single N-D starting point.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a `OptimizeResult` object.
|
||||
Important attributes are: ``x`` the solution array, ``fun`` the value
|
||||
of the function at the solution, and ``message`` which describes the
|
||||
cause of the termination.
|
||||
See `OptimizeResult` for a description of other attributes.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function implements the Dual Annealing optimization. This stochastic
|
||||
approach derived from [3]_ combines the generalization of CSA (Classical
|
||||
Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
|
||||
to a strategy for applying a local search on accepted locations [4]_.
|
||||
An alternative implementation of this same algorithm is described in [5]_
|
||||
and benchmarks are presented in [6]_. This approach introduces an advanced
|
||||
method to refine the solution found by the generalized annealing
|
||||
process. This algorithm uses a distorted Cauchy-Lorentz visiting
|
||||
distribution, with its shape controlled by the parameter :math:`q_{v}`
|
||||
|
||||
.. math::
|
||||
|
||||
g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
|
||||
\\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
|
||||
\\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
|
||||
\\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
|
||||
\\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}
|
||||
|
||||
Where :math:`t` is the artificial time. This visiting distribution is used
|
||||
to generate a trial jump distance :math:`\\Delta x(t)` of variable
|
||||
:math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.
|
||||
|
||||
From the starting point, after calling the visiting distribution
|
||||
function, the acceptance probability is computed as follows:
|
||||
|
||||
.. math::
|
||||
|
||||
p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
|
||||
\\frac{1}{1-q_{a}}}\\}}
|
||||
|
||||
Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
|
||||
acceptance probability is assigned to the cases where
|
||||
|
||||
.. math::
|
||||
|
||||
[1-(1-q_{a}) \\beta \\Delta E] < 0
|
||||
|
||||
The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to
|
||||
|
||||
.. math::
|
||||
|
||||
T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
|
||||
1 + t\\right)^{q_{v}-1}-1}
|
||||
|
||||
Where :math:`q_{v}` is the visiting parameter.
|
||||
|
||||
.. versionadded:: 1.2.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
|
||||
statistics. Journal of Statistical Physics, 52, 479-487 (1988).
|
||||
.. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
|
||||
Physica A, 233, 395-406 (1996).
|
||||
.. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
|
||||
Annealing Algorithm and Its Application to the Thomson Model.
|
||||
Physics Letters A, 233, 216-220 (1997).
|
||||
.. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
|
||||
Annealing. Physical Review E, 62, 4473 (2000).
|
||||
.. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
|
||||
Simulated Annealing for Efficient Global Optimization: the GenSA
|
||||
Package for R. The R Journal, Volume 5/1 (2013).
|
||||
.. [6] Mullen, K. Continuous Global Optimization in R. Journal of
|
||||
Statistical Software, 60(6), 1 - 45, (2014).
|
||||
:doi:`10.18637/jss.v060.i06`
|
||||
|
||||
Examples
|
||||
--------
|
||||
The following example is a 10-D problem, with many local minima.
|
||||
The function involved is called Rastrigin
|
||||
(https://en.wikipedia.org/wiki/Rastrigin_function)
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import dual_annealing
|
||||
>>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
|
||||
>>> lw = [-5.12] * 10
|
||||
>>> up = [5.12] * 10
|
||||
>>> ret = dual_annealing(func, bounds=list(zip(lw, up)))
|
||||
>>> ret.x
|
||||
array([-4.26437714e-09, -3.91699361e-09, -1.86149218e-09, -3.97165720e-09,
|
||||
-6.29151648e-09, -6.53145322e-09, -3.93616815e-09, -6.55623025e-09,
|
||||
-6.05775280e-09, -5.00668935e-09]) # random
|
||||
>>> ret.fun
|
||||
0.000000
|
||||
|
||||
"""
|
||||
|
||||
if isinstance(bounds, Bounds):
|
||||
bounds = new_bounds_to_old(bounds.lb, bounds.ub, len(bounds.lb))
|
||||
|
||||
if x0 is not None and not len(x0) == len(bounds):
|
||||
raise ValueError('Bounds size does not match x0')
|
||||
|
||||
lu = list(zip(*bounds))
|
||||
lower = np.array(lu[0])
|
||||
upper = np.array(lu[1])
|
||||
# Check that restart temperature ratio is correct
|
||||
if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
|
||||
raise ValueError('Restart temperature ratio has to be in range (0, 1)')
|
||||
# Checking bounds are valid
|
||||
if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
|
||||
np.isnan(lower)) or np.any(np.isnan(upper))):
|
||||
raise ValueError('Some bounds values are inf values or nan values')
|
||||
# Checking that bounds are consistent
|
||||
if not np.all(lower < upper):
|
||||
raise ValueError('Bounds are not consistent min < max')
|
||||
# Checking that bounds are the same length
|
||||
if not len(lower) == len(upper):
|
||||
raise ValueError('Bounds do not have the same dimensions')
|
||||
|
||||
# Wrapper for the objective function
|
||||
func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
|
||||
|
||||
# minimizer_kwargs has to be a dict, not None
|
||||
minimizer_kwargs = minimizer_kwargs or {}
|
||||
|
||||
minimizer_wrapper = LocalSearchWrapper(
|
||||
bounds, func_wrapper, *args, **minimizer_kwargs)
|
||||
|
||||
# Initialization of random Generator for reproducible runs if rng provided
|
||||
rng_gen = check_random_state(rng)
|
||||
# Initialization of the energy state
|
||||
energy_state = EnergyState(lower, upper, callback)
|
||||
energy_state.reset(func_wrapper, rng_gen, x0)
|
||||
# Minimum value of annealing temperature reached to perform
|
||||
# re-annealing
|
||||
temperature_restart = initial_temp * restart_temp_ratio
|
||||
# VisitingDistribution instance
|
||||
visit_dist = VisitingDistribution(lower, upper, visit, rng_gen)
|
||||
# Strategy chain instance
|
||||
strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
|
||||
minimizer_wrapper, rng_gen, energy_state)
|
||||
need_to_stop = False
|
||||
iteration = 0
|
||||
message = []
|
||||
# OptimizeResult object to be returned
|
||||
optimize_res = OptimizeResult()
|
||||
optimize_res.success = True
|
||||
optimize_res.status = 0
|
||||
|
||||
t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
|
||||
# Run the search loop
|
||||
while not need_to_stop:
|
||||
for i in range(maxiter):
|
||||
# Compute temperature for this step
|
||||
s = float(i) + 2.0
|
||||
t2 = np.exp((visit - 1) * np.log(s)) - 1.0
|
||||
temperature = initial_temp * t1 / t2
|
||||
if iteration >= maxiter:
|
||||
message.append("Maximum number of iteration reached")
|
||||
need_to_stop = True
|
||||
break
|
||||
# Need a re-annealing process?
|
||||
if temperature < temperature_restart:
|
||||
energy_state.reset(func_wrapper, rng_gen)
|
||||
break
|
||||
# starting strategy chain
|
||||
val = strategy_chain.run(i, temperature)
|
||||
if val is not None:
|
||||
message.append(val)
|
||||
need_to_stop = True
|
||||
optimize_res.success = False
|
||||
break
|
||||
# Possible local search at the end of the strategy chain
|
||||
if not no_local_search:
|
||||
val = strategy_chain.local_search()
|
||||
if val is not None:
|
||||
message.append(val)
|
||||
need_to_stop = True
|
||||
optimize_res.success = False
|
||||
break
|
||||
iteration += 1
|
||||
|
||||
# Setting the OptimizeResult values
|
||||
optimize_res.x = energy_state.xbest
|
||||
optimize_res.fun = energy_state.ebest
|
||||
optimize_res.nit = iteration
|
||||
optimize_res.nfev = func_wrapper.nfev
|
||||
optimize_res.njev = func_wrapper.ngev
|
||||
optimize_res.nhev = func_wrapper.nhev
|
||||
optimize_res.message = message
|
||||
return optimize_res
|
||||
798
venv/lib/python3.13/site-packages/scipy/optimize/_elementwise.py
Normal file
798
venv/lib/python3.13/site-packages/scipy/optimize/_elementwise.py
Normal file
|
|
@ -0,0 +1,798 @@
|
|||
from scipy.optimize._bracket import _bracket_root, _bracket_minimum
|
||||
from scipy.optimize._chandrupatla import _chandrupatla, _chandrupatla_minimize
|
||||
from scipy._lib._util import _RichResult
|
||||
|
||||
|
||||
def find_root(f, init, /, *, args=(), tolerances=None, maxiter=None, callback=None):
|
||||
"""Find the root of a monotonic, real-valued function of a real variable.
|
||||
|
||||
For each element of the output of `f`, `find_root` seeks the scalar
|
||||
root that makes the element 0. This function currently uses Chandrupatla's
|
||||
bracketing algorithm [1]_ and therefore requires argument `init` to
|
||||
provide a bracket around the root: the function values at the two endpoints
|
||||
must have opposite signs.
|
||||
|
||||
Provided a valid bracket, `find_root` is guaranteed to converge to a solution
|
||||
that satisfies the provided `tolerances` if the function is continuous within
|
||||
the bracket.
|
||||
|
||||
This function works elementwise when `init` and `args` contain (broadcastable)
|
||||
arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
The function whose root is desired. The signature must be::
|
||||
|
||||
f(x: array, *args) -> array
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with ``x``.
|
||||
|
||||
`f` must be an elementwise function: each element ``f(x)[i]``
|
||||
must equal ``f(x[i])`` for all indices ``i``. It must not mutate the
|
||||
array ``x`` or the arrays in ``args``.
|
||||
|
||||
`find_root` seeks an array ``x`` such that ``f(x)`` is an array of zeros.
|
||||
init : 2-tuple of float array_like
|
||||
The lower and upper endpoints of a bracket surrounding the desired root.
|
||||
A bracket is valid if arrays ``xl, xr = init`` satisfy ``xl < xr`` and
|
||||
``sign(f(xl)) == -sign(f(xr))`` elementwise. Arrays be broadcastable with
|
||||
one another and `args`.
|
||||
args : tuple of array_like, optional
|
||||
Additional positional array arguments to be passed to `f`. Arrays
|
||||
must be broadcastable with one another and the arrays of `init`.
|
||||
If the callable for which the root is desired requires arguments that are
|
||||
not broadcastable with `x`, wrap that callable with `f` such that `f`
|
||||
accepts only `x` and broadcastable ``*args``.
|
||||
tolerances : dictionary of floats, optional
|
||||
Absolute and relative tolerances on the root and function value.
|
||||
Valid keys of the dictionary are:
|
||||
|
||||
- ``xatol`` - absolute tolerance on the root
|
||||
- ``xrtol`` - relative tolerance on the root
|
||||
- ``fatol`` - absolute tolerance on the function value
|
||||
- ``frtol`` - relative tolerance on the function value
|
||||
|
||||
See Notes for default values and explicit termination conditions.
|
||||
maxiter : int, optional
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
The default is the maximum possible number of bisections within
|
||||
the (normal) floating point numbers of the relevant dtype.
|
||||
callback : callable, optional
|
||||
An optional user-supplied function to be called before the first
|
||||
iteration and after each iteration.
|
||||
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
||||
similar to that returned by `find_root` (but containing the current
|
||||
iterate's values of all variables). If `callback` raises a
|
||||
``StopIteration``, the algorithm will terminate immediately and
|
||||
`find_root` will return a result. `callback` must not mutate
|
||||
`res` or its attributes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An object similar to an instance of `scipy.optimize.OptimizeResult` with the
|
||||
following attributes. The descriptions are written as though the values will
|
||||
be scalars; however, if `f` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
success : bool array
|
||||
``True`` where the algorithm terminated successfully (status ``0``);
|
||||
``False`` otherwise.
|
||||
status : int array
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
- ``0`` : The algorithm converged to the specified tolerances.
|
||||
- ``-1`` : The initial bracket was invalid.
|
||||
- ``-2`` : The maximum number of iterations was reached.
|
||||
- ``-3`` : A non-finite value was encountered.
|
||||
- ``-4`` : Iteration was terminated by `callback`.
|
||||
- ``1`` : The algorithm is proceeding normally (in `callback` only).
|
||||
|
||||
x : float array
|
||||
The root of the function, if the algorithm terminated successfully.
|
||||
f_x : float array
|
||||
The value of `f` evaluated at `x`.
|
||||
nfev : int array
|
||||
The number of abscissae at which `f` was evaluated to find the root.
|
||||
This is distinct from the number of times `f` is *called* because the
|
||||
the function may evaluated at multiple points in a single call.
|
||||
nit : int array
|
||||
The number of iterations of the algorithm that were performed.
|
||||
bracket : tuple of float arrays
|
||||
The lower and upper endpoints of the final bracket.
|
||||
f_bracket : tuple of float arrays
|
||||
The value of `f` evaluated at the lower and upper endpoints of the
|
||||
bracket.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Implemented based on Chandrupatla's original paper [1]_.
|
||||
|
||||
Let:
|
||||
|
||||
- ``a, b = init`` be the left and right endpoints of the initial bracket,
|
||||
- ``xl`` and ``xr`` be the left and right endpoints of the final bracket,
|
||||
- ``xmin = xl if abs(f(xl)) <= abs(f(xr)) else xr`` be the final bracket
|
||||
endpoint with the smaller function value, and
|
||||
- ``fmin0 = min(f(a), f(b))`` be the minimum of the two values of the
|
||||
function evaluated at the initial bracket endpoints.
|
||||
|
||||
Then the algorithm is considered to have converged when
|
||||
|
||||
- ``abs(xr - xl) < xatol + abs(xmin) * xrtol`` or
|
||||
- ``fun(xmin) <= fatol + abs(fmin0) * frtol``.
|
||||
|
||||
This is equivalent to the termination condition described in [1]_ with
|
||||
``xrtol = 4e-10``, ``xatol = 1e-5``, and ``fatol = frtol = 0``.
|
||||
However, the default values of the `tolerances` dictionary are
|
||||
``xatol = 4*tiny``, ``xrtol = 4*eps``, ``frtol = 0``, and ``fatol = tiny``,
|
||||
where ``eps`` and ``tiny`` are the precision and smallest normal number
|
||||
of the result ``dtype`` of function inputs and outputs.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Chandrupatla, Tirupathi R.
|
||||
"A new hybrid quadratic/bisection algorithm for finding the zero of a
|
||||
nonlinear function without using derivatives".
|
||||
Advances in Engineering Software, 28(3), 145-149.
|
||||
https://doi.org/10.1016/s0965-9978(96)00051-8
|
||||
|
||||
See Also
|
||||
--------
|
||||
bracket_root
|
||||
|
||||
Examples
|
||||
--------
|
||||
Suppose we wish to find the root of the following function.
|
||||
|
||||
>>> def f(x, c=5):
|
||||
... return x**3 - 2*x - c
|
||||
|
||||
First, we must find a valid bracket. The function is not monotonic,
|
||||
but `bracket_root` may be able to provide a bracket.
|
||||
|
||||
>>> from scipy.optimize import elementwise
|
||||
>>> res_bracket = elementwise.bracket_root(f, 0)
|
||||
>>> res_bracket.success
|
||||
True
|
||||
>>> res_bracket.bracket
|
||||
(2.0, 4.0)
|
||||
|
||||
Indeed, the values of the function at the bracket endpoints have
|
||||
opposite signs.
|
||||
|
||||
>>> res_bracket.f_bracket
|
||||
(-1.0, 51.0)
|
||||
|
||||
Once we have a valid bracket, `find_root` can be used to provide
|
||||
a precise root.
|
||||
|
||||
>>> res_root = elementwise.find_root(f, res_bracket.bracket)
|
||||
>>> res_root.x
|
||||
2.0945514815423265
|
||||
|
||||
The final bracket is only a few ULPs wide, so the error between
|
||||
this value and the true root cannot be much smaller within values
|
||||
that are representable in double precision arithmetic.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> xl, xr = res_root.bracket
|
||||
>>> (xr - xl) / np.spacing(xl)
|
||||
2.0
|
||||
>>> res_root.f_bracket
|
||||
(-8.881784197001252e-16, 9.769962616701378e-15)
|
||||
|
||||
`bracket_root` and `find_root` accept arrays for most arguments.
|
||||
For instance, to find the root for a few values of the parameter ``c``
|
||||
at once:
|
||||
|
||||
>>> c = np.asarray([3, 4, 5])
|
||||
>>> res_bracket = elementwise.bracket_root(f, 0, args=(c,))
|
||||
>>> res_bracket.bracket
|
||||
(array([1., 1., 2.]), array([2., 2., 4.]))
|
||||
>>> res_root = elementwise.find_root(f, res_bracket.bracket, args=(c,))
|
||||
>>> res_root.x
|
||||
array([1.8932892 , 2. , 2.09455148])
|
||||
|
||||
"""
|
||||
|
||||
def reformat_result(res_in):
|
||||
res_out = _RichResult()
|
||||
res_out.status = res_in.status
|
||||
res_out.success = res_in.success
|
||||
res_out.x = res_in.x
|
||||
res_out.f_x = res_in.fun
|
||||
res_out.nfev = res_in.nfev
|
||||
res_out.nit = res_in.nit
|
||||
res_out.bracket = (res_in.xl, res_in.xr)
|
||||
res_out.f_bracket = (res_in.fl, res_in.fr)
|
||||
res_out._order_keys = ['success', 'status', 'x', 'f_x',
|
||||
'nfev', 'nit', 'bracket', 'f_bracket']
|
||||
return res_out
|
||||
|
||||
xl, xr = init
|
||||
default_tolerances = dict(xatol=None, xrtol=None, fatol=None, frtol=0)
|
||||
tolerances = {} if tolerances is None else tolerances
|
||||
default_tolerances.update(tolerances)
|
||||
tolerances = default_tolerances
|
||||
|
||||
if callable(callback):
|
||||
def _callback(res):
|
||||
return callback(reformat_result(res))
|
||||
else:
|
||||
_callback = callback
|
||||
|
||||
res = _chandrupatla(f, xl, xr, args=args, **tolerances,
|
||||
maxiter=maxiter, callback=_callback)
|
||||
return reformat_result(res)
|
||||
|
||||
|
||||
def find_minimum(f, init, /, *, args=(), tolerances=None, maxiter=100, callback=None):
|
||||
"""Find the minimum of an unimodal, real-valued function of a real variable.
|
||||
|
||||
For each element of the output of `f`, `find_minimum` seeks the scalar minimizer
|
||||
that minimizes the element. This function currently uses Chandrupatla's
|
||||
bracketing minimization algorithm [1]_ and therefore requires argument `init`
|
||||
to provide a three-point minimization bracket: ``x1 < x2 < x3`` such that
|
||||
``func(x1) >= func(x2) <= func(x3)``, where one of the inequalities is strict.
|
||||
|
||||
Provided a valid bracket, `find_minimum` is guaranteed to converge to a local
|
||||
minimum that satisfies the provided `tolerances` if the function is continuous
|
||||
within the bracket.
|
||||
|
||||
This function works elementwise when `init` and `args` contain (broadcastable)
|
||||
arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
The function whose minimizer is desired. The signature must be::
|
||||
|
||||
f(x: array, *args) -> array
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with ``x``.
|
||||
|
||||
`f` must be an elementwise function: each element ``f(x)[i]``
|
||||
must equal ``f(x[i])`` for all indices ``i``. It must not mutate the
|
||||
array ``x`` or the arrays in ``args``.
|
||||
|
||||
`find_minimum` seeks an array ``x`` such that ``f(x)`` is an array of
|
||||
local minima.
|
||||
init : 3-tuple of float array_like
|
||||
The abscissae of a standard scalar minimization bracket. A bracket is
|
||||
valid if arrays ``x1, x2, x3 = init`` satisfy ``x1 < x2 < x3`` and
|
||||
``func(x1) >= func(x2) <= func(x3)``, where one of the inequalities
|
||||
is strict. Arrays must be broadcastable with one another and the arrays
|
||||
of `args`.
|
||||
args : tuple of array_like, optional
|
||||
Additional positional array arguments to be passed to `f`. Arrays
|
||||
must be broadcastable with one another and the arrays of `init`.
|
||||
If the callable for which the root is desired requires arguments that are
|
||||
not broadcastable with `x`, wrap that callable with `f` such that `f`
|
||||
accepts only `x` and broadcastable ``*args``.
|
||||
tolerances : dictionary of floats, optional
|
||||
Absolute and relative tolerances on the root and function value.
|
||||
Valid keys of the dictionary are:
|
||||
|
||||
- ``xatol`` - absolute tolerance on the root
|
||||
- ``xrtol`` - relative tolerance on the root
|
||||
- ``fatol`` - absolute tolerance on the function value
|
||||
- ``frtol`` - relative tolerance on the function value
|
||||
|
||||
See Notes for default values and explicit termination conditions.
|
||||
maxiter : int, default: 100
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
callback : callable, optional
|
||||
An optional user-supplied function to be called before the first
|
||||
iteration and after each iteration.
|
||||
Called as ``callback(res)``, where ``res`` is a ``_RichResult``
|
||||
similar to that returned by `find_minimum` (but containing the current
|
||||
iterate's values of all variables). If `callback` raises a
|
||||
``StopIteration``, the algorithm will terminate immediately and
|
||||
`find_root` will return a result. `callback` must not mutate
|
||||
`res` or its attributes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An object similar to an instance of `scipy.optimize.OptimizeResult` with the
|
||||
following attributes. The descriptions are written as though the values will
|
||||
be scalars; however, if `f` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
success : bool array
|
||||
``True`` where the algorithm terminated successfully (status ``0``);
|
||||
``False`` otherwise.
|
||||
status : int array
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
- ``0`` : The algorithm converged to the specified tolerances.
|
||||
- ``-1`` : The algorithm encountered an invalid bracket.
|
||||
- ``-2`` : The maximum number of iterations was reached.
|
||||
- ``-3`` : A non-finite value was encountered.
|
||||
- ``-4`` : Iteration was terminated by `callback`.
|
||||
- ``1`` : The algorithm is proceeding normally (in `callback` only).
|
||||
|
||||
x : float array
|
||||
The minimizer of the function, if the algorithm terminated successfully.
|
||||
f_x : float array
|
||||
The value of `f` evaluated at `x`.
|
||||
nfev : int array
|
||||
The number of abscissae at which `f` was evaluated to find the root.
|
||||
This is distinct from the number of times `f` is *called* because the
|
||||
the function may evaluated at multiple points in a single call.
|
||||
nit : int array
|
||||
The number of iterations of the algorithm that were performed.
|
||||
bracket : tuple of float arrays
|
||||
The final three-point bracket.
|
||||
f_bracket : tuple of float arrays
|
||||
The value of `f` evaluated at the bracket points.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Implemented based on Chandrupatla's original paper [1]_.
|
||||
|
||||
If ``xl < xm < xr`` are the points of the bracket and ``fl >= fm <= fr``
|
||||
(where one of the inequalities is strict) are the values of `f` evaluated
|
||||
at those points, then the algorithm is considered to have converged when:
|
||||
|
||||
- ``abs(xr - xm)/2 <= abs(xm)*xrtol + xatol`` or
|
||||
- ``(fl - 2*fm + fr)/2 <= abs(fm)*frtol + fatol``.
|
||||
|
||||
The default value of `xrtol` is the square root of the precision of the
|
||||
appropriate dtype, and ``xatol = fatol = frtol`` is the smallest normal
|
||||
number of the appropriate dtype.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Chandrupatla, Tirupathi R. (1998).
|
||||
"An efficient quadratic fit-sectioning algorithm for minimization
|
||||
without derivatives".
|
||||
Computer Methods in Applied Mechanics and Engineering, 152 (1-2),
|
||||
211-217. https://doi.org/10.1016/S0045-7825(97)00190-4
|
||||
|
||||
See Also
|
||||
--------
|
||||
bracket_minimum
|
||||
|
||||
Examples
|
||||
--------
|
||||
Suppose we wish to minimize the following function.
|
||||
|
||||
>>> def f(x, c=1):
|
||||
... return (x - c)**2 + 2
|
||||
|
||||
First, we must find a valid bracket. The function is unimodal,
|
||||
so `bracket_minium` will easily find a bracket.
|
||||
|
||||
>>> from scipy.optimize import elementwise
|
||||
>>> res_bracket = elementwise.bracket_minimum(f, 0)
|
||||
>>> res_bracket.success
|
||||
True
|
||||
>>> res_bracket.bracket
|
||||
(0.0, 0.5, 1.5)
|
||||
|
||||
Indeed, the bracket points are ordered and the function value
|
||||
at the middle bracket point is less than at the surrounding
|
||||
points.
|
||||
|
||||
>>> xl, xm, xr = res_bracket.bracket
|
||||
>>> fl, fm, fr = res_bracket.f_bracket
|
||||
>>> (xl < xm < xr) and (fl > fm <= fr)
|
||||
True
|
||||
|
||||
Once we have a valid bracket, `find_minimum` can be used to provide
|
||||
an estimate of the minimizer.
|
||||
|
||||
>>> res_minimum = elementwise.find_minimum(f, res_bracket.bracket)
|
||||
>>> res_minimum.x
|
||||
1.0000000149011612
|
||||
|
||||
The function value changes by only a few ULPs within the bracket, so
|
||||
the minimizer cannot be determined much more precisely by evaluating
|
||||
the function alone (i.e. we would need its derivative to do better).
|
||||
|
||||
>>> import numpy as np
|
||||
>>> fl, fm, fr = res_minimum.f_bracket
|
||||
>>> (fl - fm) / np.spacing(fm), (fr - fm) / np.spacing(fm)
|
||||
(0.0, 2.0)
|
||||
|
||||
Therefore, a precise minimum of the function is given by:
|
||||
|
||||
>>> res_minimum.f_x
|
||||
2.0
|
||||
|
||||
`bracket_minimum` and `find_minimum` accept arrays for most arguments.
|
||||
For instance, to find the minimizers and minima for a few values of the
|
||||
parameter ``c`` at once:
|
||||
|
||||
>>> c = np.asarray([1, 1.5, 2])
|
||||
>>> res_bracket = elementwise.bracket_minimum(f, 0, args=(c,))
|
||||
>>> res_bracket.bracket
|
||||
(array([0. , 0.5, 0.5]), array([0.5, 1.5, 1.5]), array([1.5, 2.5, 2.5]))
|
||||
>>> res_minimum = elementwise.find_minimum(f, res_bracket.bracket, args=(c,))
|
||||
>>> res_minimum.x
|
||||
array([1.00000001, 1.5 , 2. ])
|
||||
>>> res_minimum.f_x
|
||||
array([2., 2., 2.])
|
||||
|
||||
"""
|
||||
|
||||
def reformat_result(res_in):
|
||||
res_out = _RichResult()
|
||||
res_out.status = res_in.status
|
||||
res_out.success = res_in.success
|
||||
res_out.x = res_in.x
|
||||
res_out.f_x = res_in.fun
|
||||
res_out.nfev = res_in.nfev
|
||||
res_out.nit = res_in.nit
|
||||
res_out.bracket = (res_in.xl, res_in.xm, res_in.xr)
|
||||
res_out.f_bracket = (res_in.fl, res_in.fm, res_in.fr)
|
||||
res_out._order_keys = ['success', 'status', 'x', 'f_x',
|
||||
'nfev', 'nit', 'bracket', 'f_bracket']
|
||||
return res_out
|
||||
|
||||
xl, xm, xr = init
|
||||
default_tolerances = dict(xatol=None, xrtol=None, fatol=None, frtol=None)
|
||||
tolerances = {} if tolerances is None else tolerances
|
||||
default_tolerances.update(tolerances)
|
||||
tolerances = default_tolerances
|
||||
|
||||
if callable(callback):
|
||||
def _callback(res):
|
||||
return callback(reformat_result(res))
|
||||
else:
|
||||
_callback = callback
|
||||
|
||||
res = _chandrupatla_minimize(f, xl, xm, xr, args=args, **tolerances,
|
||||
maxiter=maxiter, callback=_callback)
|
||||
return reformat_result(res)
|
||||
|
||||
|
||||
def bracket_root(f, xl0, xr0=None, *, xmin=None, xmax=None, factor=None, args=(),
|
||||
maxiter=1000):
|
||||
"""Bracket the root of a monotonic, real-valued function of a real variable.
|
||||
|
||||
For each element of the output of `f`, `bracket_root` seeks the scalar
|
||||
bracket endpoints ``xl`` and ``xr`` such that ``sign(f(xl)) == -sign(f(xr))``
|
||||
elementwise.
|
||||
|
||||
The function is guaranteed to find a valid bracket if the function is monotonic,
|
||||
but it may find a bracket under other conditions.
|
||||
|
||||
This function works elementwise when `xl0`, `xr0`, `xmin`, `xmax`, `factor`, and
|
||||
the elements of `args` are (mutually broadcastable) arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
The function for which the root is to be bracketed. The signature must be::
|
||||
|
||||
f(x: array, *args) -> array
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with ``x``.
|
||||
|
||||
`f` must be an elementwise function: each element ``f(x)[i]``
|
||||
must equal ``f(x[i])`` for all indices ``i``. It must not mutate the
|
||||
array ``x`` or the arrays in ``args``.
|
||||
xl0, xr0: float array_like
|
||||
Starting guess of bracket, which need not contain a root. If `xr0` is
|
||||
not provided, ``xr0 = xl0 + 1``. Must be broadcastable with all other
|
||||
array inputs.
|
||||
xmin, xmax : float array_like, optional
|
||||
Minimum and maximum allowable endpoints of the bracket, inclusive. Must
|
||||
be broadcastable with all other array inputs.
|
||||
factor : float array_like, default: 2
|
||||
The factor used to grow the bracket. See Notes.
|
||||
args : tuple of array_like, optional
|
||||
Additional positional array arguments to be passed to `f`.
|
||||
If the callable for which the root is desired requires arguments that are
|
||||
not broadcastable with `x`, wrap that callable with `f` such that `f`
|
||||
accepts only `x` and broadcastable ``*args``.
|
||||
maxiter : int, default: 1000
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An object similar to an instance of `scipy.optimize.OptimizeResult` with the
|
||||
following attributes. The descriptions are written as though the values will
|
||||
be scalars; however, if `f` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
success : bool array
|
||||
``True`` where the algorithm terminated successfully (status ``0``);
|
||||
``False`` otherwise.
|
||||
status : int array
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
- ``0`` : The algorithm produced a valid bracket.
|
||||
- ``-1`` : The bracket expanded to the allowable limits without success.
|
||||
- ``-2`` : The maximum number of iterations was reached.
|
||||
- ``-3`` : A non-finite value was encountered.
|
||||
- ``-4`` : Iteration was terminated by `callback`.
|
||||
- ``-5``: The initial bracket does not satisfy`xmin <= xl0 < xr0 < xmax`.
|
||||
|
||||
bracket : 2-tuple of float arrays
|
||||
The lower and upper endpoints of the bracket, if the algorithm
|
||||
terminated successfully.
|
||||
f_bracket : 2-tuple of float arrays
|
||||
The values of `f` evaluated at the endpoints of ``res.bracket``,
|
||||
respectively.
|
||||
nfev : int array
|
||||
The number of abscissae at which `f` was evaluated to find the root.
|
||||
This is distinct from the number of times `f` is *called* because the
|
||||
the function may evaluated at multiple points in a single call.
|
||||
nit : int array
|
||||
The number of iterations of the algorithm that were performed.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function generalizes an algorithm found in pieces throughout the
|
||||
`scipy.stats` codebase. The strategy is to iteratively grow the bracket `(l, r)`
|
||||
until ``f(l) < 0 < f(r)`` or ``f(r) < 0 < f(l)``. The bracket grows to the left
|
||||
as follows.
|
||||
|
||||
- If `xmin` is not provided, the distance between `xl0` and `l` is iteratively
|
||||
increased by `factor`.
|
||||
- If `xmin` is provided, the distance between `xmin` and `l` is iteratively
|
||||
decreased by `factor`. Note that this also *increases* the bracket size.
|
||||
|
||||
Growth of the bracket to the right is analogous.
|
||||
|
||||
Growth of the bracket in one direction stops when the endpoint is no longer
|
||||
finite, the function value at the endpoint is no longer finite, or the
|
||||
endpoint reaches its limiting value (`xmin` or `xmax`). Iteration terminates
|
||||
when the bracket stops growing in both directions, the bracket surrounds
|
||||
the root, or a root is found (by chance).
|
||||
|
||||
If two brackets are found - that is, a bracket is found on both sides in
|
||||
the same iteration, the smaller of the two is returned.
|
||||
|
||||
If roots of the function are found, both `xl` and `xr` are set to the
|
||||
leftmost root.
|
||||
|
||||
See Also
|
||||
--------
|
||||
find_root
|
||||
|
||||
Examples
|
||||
--------
|
||||
Suppose we wish to find the root of the following function.
|
||||
|
||||
>>> def f(x, c=5):
|
||||
... return x**3 - 2*x - c
|
||||
|
||||
First, we must find a valid bracket. The function is not monotonic,
|
||||
but `bracket_root` may be able to provide a bracket.
|
||||
|
||||
>>> from scipy.optimize import elementwise
|
||||
>>> res_bracket = elementwise.bracket_root(f, 0)
|
||||
>>> res_bracket.success
|
||||
True
|
||||
>>> res_bracket.bracket
|
||||
(2.0, 4.0)
|
||||
|
||||
Indeed, the values of the function at the bracket endpoints have
|
||||
opposite signs.
|
||||
|
||||
>>> res_bracket.f_bracket
|
||||
(-1.0, 51.0)
|
||||
|
||||
Once we have a valid bracket, `find_root` can be used to provide
|
||||
a precise root.
|
||||
|
||||
>>> res_root = elementwise.find_root(f, res_bracket.bracket)
|
||||
>>> res_root.x
|
||||
2.0945514815423265
|
||||
|
||||
`bracket_root` and `find_root` accept arrays for most arguments.
|
||||
For instance, to find the root for a few values of the parameter ``c``
|
||||
at once:
|
||||
|
||||
>>> import numpy as np
|
||||
>>> c = np.asarray([3, 4, 5])
|
||||
>>> res_bracket = elementwise.bracket_root(f, 0, args=(c,))
|
||||
>>> res_bracket.bracket
|
||||
(array([1., 1., 2.]), array([2., 2., 4.]))
|
||||
>>> res_root = elementwise.find_root(f, res_bracket.bracket, args=(c,))
|
||||
>>> res_root.x
|
||||
array([1.8932892 , 2. , 2.09455148])
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
res = _bracket_root(f, xl0, xr0=xr0, xmin=xmin, xmax=xmax, factor=factor,
|
||||
args=args, maxiter=maxiter)
|
||||
res.bracket = res.xl, res.xr
|
||||
res.f_bracket = res.fl, res.fr
|
||||
del res.xl
|
||||
del res.xr
|
||||
del res.fl
|
||||
del res.fr
|
||||
return res
|
||||
|
||||
|
||||
def bracket_minimum(f, xm0, *, xl0=None, xr0=None, xmin=None, xmax=None,
|
||||
factor=None, args=(), maxiter=1000):
|
||||
"""Bracket the minimum of a unimodal, real-valued function of a real variable.
|
||||
|
||||
For each element of the output of `f`, `bracket_minimum` seeks the scalar
|
||||
bracket points ``xl < xm < xr`` such that ``fl >= fm <= fr`` where one of the
|
||||
inequalities is strict.
|
||||
|
||||
The function is guaranteed to find a valid bracket if the function is
|
||||
strongly unimodal, but it may find a bracket under other conditions.
|
||||
|
||||
This function works elementwise when `xm0`, `xl0`, `xr0`, `xmin`, `xmax`, `factor`,
|
||||
and the elements of `args` are (mutually broadcastable) arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
The function for which the root is to be bracketed. The signature must be::
|
||||
|
||||
f(x: array, *args) -> array
|
||||
|
||||
where each element of ``x`` is a finite real and ``args`` is a tuple,
|
||||
which may contain an arbitrary number of arrays that are broadcastable
|
||||
with ``x``.
|
||||
|
||||
`f` must be an elementwise function: each element ``f(x)[i]``
|
||||
must equal ``f(x[i])`` for all indices ``i``. It must not mutate the
|
||||
array ``x`` or the arrays in ``args``.
|
||||
xm0: float array_like
|
||||
Starting guess for middle point of bracket.
|
||||
xl0, xr0: float array_like, optional
|
||||
Starting guesses for left and right endpoints of the bracket. Must
|
||||
be broadcastable with all other array inputs.
|
||||
xmin, xmax : float array_like, optional
|
||||
Minimum and maximum allowable endpoints of the bracket, inclusive. Must
|
||||
be broadcastable with all other array inputs.
|
||||
factor : float array_like, default: 2
|
||||
The factor used to grow the bracket. See Notes.
|
||||
args : tuple of array_like, optional
|
||||
Additional positional array arguments to be passed to `f`.
|
||||
If the callable for which the root is desired requires arguments that are
|
||||
not broadcastable with `x`, wrap that callable with `f` such that `f`
|
||||
accepts only `x` and broadcastable ``*args``.
|
||||
maxiter : int, default: 1000
|
||||
The maximum number of iterations of the algorithm to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : _RichResult
|
||||
An object similar to an instance of `scipy.optimize.OptimizeResult` with the
|
||||
following attributes. The descriptions are written as though the values will
|
||||
be scalars; however, if `f` returns an array, the outputs will be
|
||||
arrays of the same shape.
|
||||
|
||||
success : bool array
|
||||
``True`` where the algorithm terminated successfully (status ``0``);
|
||||
``False`` otherwise.
|
||||
status : int array
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
- ``0`` : The algorithm produced a valid bracket.
|
||||
- ``-1`` : The bracket expanded to the allowable limits. Assuming
|
||||
unimodality, this implies the endpoint at the limit is a minimizer.
|
||||
- ``-2`` : The maximum number of iterations was reached.
|
||||
- ``-3`` : A non-finite value was encountered.
|
||||
- ``-4`` : ``None`` shall pass.
|
||||
- ``-5`` : The initial bracket does not satisfy
|
||||
`xmin <= xl0 < xm0 < xr0 <= xmax`.
|
||||
|
||||
bracket : 3-tuple of float arrays
|
||||
The left, middle, and right points of the bracket, if the algorithm
|
||||
terminated successfully.
|
||||
f_bracket : 3-tuple of float arrays
|
||||
The function value at the left, middle, and right points of the bracket.
|
||||
nfev : int array
|
||||
The number of abscissae at which `f` was evaluated to find the root.
|
||||
This is distinct from the number of times `f` is *called* because the
|
||||
the function may evaluated at multiple points in a single call.
|
||||
nit : int array
|
||||
The number of iterations of the algorithm that were performed.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Similar to `scipy.optimize.bracket`, this function seeks to find real
|
||||
points ``xl < xm < xr`` such that ``f(xl) >= f(xm)`` and ``f(xr) >= f(xm)``,
|
||||
where at least one of the inequalities is strict. Unlike `scipy.optimize.bracket`,
|
||||
this function can operate in a vectorized manner on array input, so long as
|
||||
the input arrays are broadcastable with each other. Also unlike
|
||||
`scipy.optimize.bracket`, users may specify minimum and maximum endpoints
|
||||
for the desired bracket.
|
||||
|
||||
Given an initial trio of points ``xl = xl0``, ``xm = xm0``, ``xr = xr0``,
|
||||
the algorithm checks if these points already give a valid bracket. If not,
|
||||
a new endpoint, ``w`` is chosen in the "downhill" direction, ``xm`` becomes the new
|
||||
opposite endpoint, and either `xl` or `xr` becomes the new middle point,
|
||||
depending on which direction is downhill. The algorithm repeats from here.
|
||||
|
||||
The new endpoint `w` is chosen differently depending on whether or not a
|
||||
boundary `xmin` or `xmax` has been set in the downhill direction. Without
|
||||
loss of generality, suppose the downhill direction is to the right, so that
|
||||
``f(xl) > f(xm) > f(xr)``. If there is no boundary to the right, then `w`
|
||||
is chosen to be ``xr + factor * (xr - xm)`` where `factor` is controlled by
|
||||
the user (defaults to 2.0) so that step sizes increase in geometric proportion.
|
||||
If there is a boundary, `xmax` in this case, then `w` is chosen to be
|
||||
``xmax - (xmax - xr)/factor``, with steps slowing to a stop at
|
||||
`xmax`. This cautious approach ensures that a minimum near but distinct from
|
||||
the boundary isn't missed while also detecting whether or not the `xmax` is
|
||||
a minimizer when `xmax` is reached after a finite number of steps.
|
||||
|
||||
See Also
|
||||
--------
|
||||
scipy.optimize.bracket
|
||||
scipy.optimize.elementwise.find_minimum
|
||||
|
||||
Examples
|
||||
--------
|
||||
Suppose we wish to minimize the following function.
|
||||
|
||||
>>> def f(x, c=1):
|
||||
... return (x - c)**2 + 2
|
||||
|
||||
First, we must find a valid bracket. The function is unimodal,
|
||||
so `bracket_minium` will easily find a bracket.
|
||||
|
||||
>>> from scipy.optimize import elementwise
|
||||
>>> res_bracket = elementwise.bracket_minimum(f, 0)
|
||||
>>> res_bracket.success
|
||||
True
|
||||
>>> res_bracket.bracket
|
||||
(0.0, 0.5, 1.5)
|
||||
|
||||
Indeed, the bracket points are ordered and the function value
|
||||
at the middle bracket point is less than at the surrounding
|
||||
points.
|
||||
|
||||
>>> xl, xm, xr = res_bracket.bracket
|
||||
>>> fl, fm, fr = res_bracket.f_bracket
|
||||
>>> (xl < xm < xr) and (fl > fm <= fr)
|
||||
True
|
||||
|
||||
Once we have a valid bracket, `find_minimum` can be used to provide
|
||||
an estimate of the minimizer.
|
||||
|
||||
>>> res_minimum = elementwise.find_minimum(f, res_bracket.bracket)
|
||||
>>> res_minimum.x
|
||||
1.0000000149011612
|
||||
|
||||
`bracket_minimum` and `find_minimum` accept arrays for most arguments.
|
||||
For instance, to find the minimizers and minima for a few values of the
|
||||
parameter ``c`` at once:
|
||||
|
||||
>>> import numpy as np
|
||||
>>> c = np.asarray([1, 1.5, 2])
|
||||
>>> res_bracket = elementwise.bracket_minimum(f, 0, args=(c,))
|
||||
>>> res_bracket.bracket
|
||||
(array([0. , 0.5, 0.5]), array([0.5, 1.5, 1.5]), array([1.5, 2.5, 2.5]))
|
||||
>>> res_minimum = elementwise.find_minimum(f, res_bracket.bracket, args=(c,))
|
||||
>>> res_minimum.x
|
||||
array([1.00000001, 1.5 , 2. ])
|
||||
>>> res_minimum.f_x
|
||||
array([2., 2., 2.])
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
res = _bracket_minimum(f, xm0, xl0=xl0, xr0=xr0, xmin=xmin, xmax=xmax,
|
||||
factor=factor, args=args, maxiter=maxiter)
|
||||
res.bracket = res.xl, res.xm, res.xr
|
||||
res.f_bracket = res.fl, res.fm, res.fr
|
||||
del res.xl
|
||||
del res.xm
|
||||
del res.xr
|
||||
del res.fl
|
||||
del res.fm
|
||||
del res.fr
|
||||
return res
|
||||
Binary file not shown.
|
|
@ -0,0 +1,479 @@
|
|||
"""Hessian update strategies for quasi-Newton optimization methods."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import get_blas_funcs, issymmetric
|
||||
from warnings import warn
|
||||
|
||||
|
||||
__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']
|
||||
|
||||
|
||||
class HessianUpdateStrategy:
|
||||
"""Interface for implementing Hessian update strategies.
|
||||
|
||||
Many optimization methods make use of Hessian (or inverse Hessian)
|
||||
approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.
|
||||
Some of these approximations, however, do not actually need to store
|
||||
the entire matrix or can compute the internal matrix product with a
|
||||
given vector in a very efficiently manner. This class serves as an
|
||||
abstract interface between the optimization algorithm and the
|
||||
quasi-Newton update strategies, giving freedom of implementation
|
||||
to store and update the internal matrix as efficiently as possible.
|
||||
Different choices of initialization and update procedure will result
|
||||
in different quasi-Newton strategies.
|
||||
|
||||
Four methods should be implemented in derived classes: ``initialize``,
|
||||
``update``, ``dot`` and ``get_matrix``. The matrix multiplication
|
||||
operator ``@`` is also defined to call the ``dot`` method.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Any instance of a class that implements this interface,
|
||||
can be accepted by the method ``minimize`` and used by
|
||||
the compatible solvers to approximate the Hessian (or
|
||||
inverse Hessian) used by the optimization algorithms.
|
||||
"""
|
||||
|
||||
def initialize(self, n, approx_type):
|
||||
"""Initialize internal matrix.
|
||||
|
||||
Allocate internal memory for storing and updating
|
||||
the Hessian or its inverse.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Problem dimension.
|
||||
approx_type : {'hess', 'inv_hess'}
|
||||
Selects either the Hessian or the inverse Hessian.
|
||||
When set to 'hess' the Hessian will be stored and updated.
|
||||
When set to 'inv_hess' its inverse will be used instead.
|
||||
"""
|
||||
raise NotImplementedError("The method ``initialize(n, approx_type)``"
|
||||
" is not implemented.")
|
||||
|
||||
def update(self, delta_x, delta_grad):
|
||||
"""Update internal matrix.
|
||||
|
||||
Update Hessian matrix or its inverse (depending on how 'approx_type'
|
||||
is defined) using information about the last evaluated points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delta_x : ndarray
|
||||
The difference between two points the gradient
|
||||
function have been evaluated at: ``delta_x = x2 - x1``.
|
||||
delta_grad : ndarray
|
||||
The difference between the gradients:
|
||||
``delta_grad = grad(x2) - grad(x1)``.
|
||||
"""
|
||||
raise NotImplementedError("The method ``update(delta_x, delta_grad)``"
|
||||
" is not implemented.")
|
||||
|
||||
def dot(self, p):
|
||||
"""Compute the product of the internal matrix with the given vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array_like
|
||||
1-D array representing a vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Hp : array
|
||||
1-D represents the result of multiplying the approximation matrix
|
||||
by vector p.
|
||||
"""
|
||||
raise NotImplementedError("The method ``dot(p)``"
|
||||
" is not implemented.")
|
||||
|
||||
def get_matrix(self):
|
||||
"""Return current internal matrix.
|
||||
|
||||
Returns
|
||||
-------
|
||||
H : ndarray, shape (n, n)
|
||||
Dense matrix containing either the Hessian
|
||||
or its inverse (depending on how 'approx_type'
|
||||
is defined).
|
||||
"""
|
||||
raise NotImplementedError("The method ``get_matrix(p)``"
|
||||
" is not implemented.")
|
||||
|
||||
def __matmul__(self, p):
|
||||
return self.dot(p)
|
||||
|
||||
|
||||
class FullHessianUpdateStrategy(HessianUpdateStrategy):
|
||||
"""Hessian update strategy with full dimensional internal representation.
|
||||
"""
|
||||
_syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update
|
||||
_syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update
|
||||
# Symmetric matrix-vector product
|
||||
_symv = get_blas_funcs('symv', dtype='d')
|
||||
|
||||
def __init__(self, init_scale='auto'):
|
||||
self.init_scale = init_scale
|
||||
# Until initialize is called we can't really use the class,
|
||||
# so it makes sense to set everything to None.
|
||||
self.first_iteration = None
|
||||
self.approx_type = None
|
||||
self.B = None
|
||||
self.H = None
|
||||
|
||||
def initialize(self, n, approx_type):
|
||||
"""Initialize internal matrix.
|
||||
|
||||
Allocate internal memory for storing and updating
|
||||
the Hessian or its inverse.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Problem dimension.
|
||||
approx_type : {'hess', 'inv_hess'}
|
||||
Selects either the Hessian or the inverse Hessian.
|
||||
When set to 'hess' the Hessian will be stored and updated.
|
||||
When set to 'inv_hess' its inverse will be used instead.
|
||||
"""
|
||||
self.first_iteration = True
|
||||
self.n = n
|
||||
self.approx_type = approx_type
|
||||
if approx_type not in ('hess', 'inv_hess'):
|
||||
raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
|
||||
# Create matrix
|
||||
if self.approx_type == 'hess':
|
||||
self.B = np.eye(n, dtype=float)
|
||||
else:
|
||||
self.H = np.eye(n, dtype=float)
|
||||
|
||||
def _auto_scale(self, delta_x, delta_grad):
|
||||
# Heuristic to scale matrix at first iteration.
|
||||
# Described in Nocedal and Wright "Numerical Optimization"
|
||||
# p.143 formula (6.20).
|
||||
s_norm2 = np.dot(delta_x, delta_x)
|
||||
y_norm2 = np.dot(delta_grad, delta_grad)
|
||||
ys = np.abs(np.dot(delta_grad, delta_x))
|
||||
if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
|
||||
return 1
|
||||
if self.approx_type == 'hess':
|
||||
return y_norm2 / ys
|
||||
else:
|
||||
return ys / y_norm2
|
||||
|
||||
def _update_implementation(self, delta_x, delta_grad):
|
||||
raise NotImplementedError("The method ``_update_implementation``"
|
||||
" is not implemented.")
|
||||
|
||||
def update(self, delta_x, delta_grad):
|
||||
"""Update internal matrix.
|
||||
|
||||
Update Hessian matrix or its inverse (depending on how 'approx_type'
|
||||
is defined) using information about the last evaluated points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delta_x : ndarray
|
||||
The difference between two points the gradient
|
||||
function have been evaluated at: ``delta_x = x2 - x1``.
|
||||
delta_grad : ndarray
|
||||
The difference between the gradients:
|
||||
``delta_grad = grad(x2) - grad(x1)``.
|
||||
"""
|
||||
if np.all(delta_x == 0.0):
|
||||
return
|
||||
if np.all(delta_grad == 0.0):
|
||||
warn('delta_grad == 0.0. Check if the approximated '
|
||||
'function is linear. If the function is linear '
|
||||
'better results can be obtained by defining the '
|
||||
'Hessian as zero instead of using quasi-Newton '
|
||||
'approximations.',
|
||||
UserWarning, stacklevel=2)
|
||||
return
|
||||
if self.first_iteration:
|
||||
# Get user specific scale
|
||||
if isinstance(self.init_scale, str) and self.init_scale == "auto":
|
||||
scale = self._auto_scale(delta_x, delta_grad)
|
||||
else:
|
||||
scale = self.init_scale
|
||||
|
||||
# Check for complex: numpy will silently cast a complex array to
|
||||
# a real one but not so for scalar as it raises a TypeError.
|
||||
# Checking here brings a consistent behavior.
|
||||
replace = False
|
||||
if np.size(scale) == 1:
|
||||
# to account for the legacy behavior having the exact same cast
|
||||
scale = float(scale)
|
||||
elif np.iscomplexobj(scale):
|
||||
raise TypeError("init_scale contains complex elements, "
|
||||
"must be real.")
|
||||
else: # test explicitly for allowed shapes and values
|
||||
replace = True
|
||||
if self.approx_type == 'hess':
|
||||
shape = np.shape(self.B)
|
||||
dtype = self.B.dtype
|
||||
else:
|
||||
shape = np.shape(self.H)
|
||||
dtype = self.H.dtype
|
||||
# copy, will replace the original
|
||||
scale = np.array(scale, dtype=dtype, copy=True)
|
||||
|
||||
# it has to match the shape of the matrix for the multiplication,
|
||||
# no implicit broadcasting is allowed
|
||||
if shape != (init_shape := np.shape(scale)):
|
||||
raise ValueError("If init_scale is an array, it must have the "
|
||||
f"dimensions of the hess/inv_hess: {shape}."
|
||||
f" Got {init_shape}.")
|
||||
if not issymmetric(scale):
|
||||
raise ValueError("If init_scale is an array, it must be"
|
||||
" symmetric (passing scipy.linalg.issymmetric)"
|
||||
" to be an approximation of a hess/inv_hess.")
|
||||
|
||||
# Scale initial matrix with ``scale * np.eye(n)`` or replace
|
||||
# This is not ideal, we could assign the scale directly in
|
||||
# initialize, but we would need to
|
||||
if self.approx_type == 'hess':
|
||||
if replace:
|
||||
self.B = scale
|
||||
else:
|
||||
self.B *= scale
|
||||
else:
|
||||
if replace:
|
||||
self.H = scale
|
||||
else:
|
||||
self.H *= scale
|
||||
self.first_iteration = False
|
||||
self._update_implementation(delta_x, delta_grad)
|
||||
|
||||
def dot(self, p):
|
||||
"""Compute the product of the internal matrix with the given vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array_like
|
||||
1-D array representing a vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Hp : array
|
||||
1-D represents the result of multiplying the approximation matrix
|
||||
by vector p.
|
||||
"""
|
||||
if self.approx_type == 'hess':
|
||||
return self._symv(1, self.B, p)
|
||||
else:
|
||||
return self._symv(1, self.H, p)
|
||||
|
||||
def get_matrix(self):
|
||||
"""Return the current internal matrix.
|
||||
|
||||
Returns
|
||||
-------
|
||||
M : ndarray, shape (n, n)
|
||||
Dense matrix containing either the Hessian or its inverse
|
||||
(depending on how `approx_type` was defined).
|
||||
"""
|
||||
if self.approx_type == 'hess':
|
||||
M = np.copy(self.B)
|
||||
else:
|
||||
M = np.copy(self.H)
|
||||
li = np.tril_indices_from(M, k=-1)
|
||||
M[li] = M.T[li]
|
||||
return M
|
||||
|
||||
|
||||
class BFGS(FullHessianUpdateStrategy):
|
||||
"""Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exception_strategy : {'skip_update', 'damp_update'}, optional
|
||||
Define how to proceed when the curvature condition is violated.
|
||||
Set it to 'skip_update' to just skip the update. Or, alternatively,
|
||||
set it to 'damp_update' to interpolate between the actual BFGS
|
||||
result and the unmodified matrix. Both exceptions strategies
|
||||
are explained in [1]_, p.536-537.
|
||||
min_curvature : float
|
||||
This number, scaled by a normalization factor, defines the
|
||||
minimum curvature ``dot(delta_grad, delta_x)`` allowed to go
|
||||
unaffected by the exception strategy. By default is equal to
|
||||
1e-8 when ``exception_strategy = 'skip_update'`` and equal
|
||||
to 0.2 when ``exception_strategy = 'damp_update'``.
|
||||
init_scale : {float, np.array, 'auto'}
|
||||
This parameter can be used to initialize the Hessian or its
|
||||
inverse. When a float is given, the relevant array is initialized
|
||||
to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
|
||||
Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
|
||||
this array will be used. Otherwise an error is generated.
|
||||
Set it to 'auto' in order to use an automatic heuristic for choosing
|
||||
the initial scale. The heuristic is described in [1]_, p.143.
|
||||
The default is 'auto'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The update is based on the description in [1]_, p.140.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
|
||||
def __init__(self, exception_strategy='skip_update', min_curvature=None,
|
||||
init_scale='auto'):
|
||||
if exception_strategy == 'skip_update':
|
||||
if min_curvature is not None:
|
||||
self.min_curvature = min_curvature
|
||||
else:
|
||||
self.min_curvature = 1e-8
|
||||
elif exception_strategy == 'damp_update':
|
||||
if min_curvature is not None:
|
||||
self.min_curvature = min_curvature
|
||||
else:
|
||||
self.min_curvature = 0.2
|
||||
else:
|
||||
raise ValueError("`exception_strategy` must be 'skip_update' "
|
||||
"or 'damp_update'.")
|
||||
|
||||
super().__init__(init_scale)
|
||||
self.exception_strategy = exception_strategy
|
||||
|
||||
def _update_inverse_hessian(self, ys, Hy, yHy, s):
|
||||
"""Update the inverse Hessian matrix.
|
||||
|
||||
BFGS update using the formula:
|
||||
|
||||
``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)
|
||||
- 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``
|
||||
|
||||
where ``s = delta_x`` and ``y = delta_grad``. This formula is
|
||||
equivalent to (6.17) in [1]_ written in a more efficient way
|
||||
for implementation.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)
|
||||
self.H = self._syr((ys + yHy) / ys ** 2, s, a=self.H)
|
||||
|
||||
def _update_hessian(self, ys, Bs, sBs, y):
|
||||
"""Update the Hessian matrix.
|
||||
|
||||
BFGS update using the formula:
|
||||
|
||||
``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``
|
||||
|
||||
where ``s`` is short for ``delta_x`` and ``y`` is short
|
||||
for ``delta_grad``. Formula (6.19) in [1]_.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
self.B = self._syr(1.0 / ys, y, a=self.B)
|
||||
self.B = self._syr(-1.0 / sBs, Bs, a=self.B)
|
||||
|
||||
def _update_implementation(self, delta_x, delta_grad):
|
||||
# Auxiliary variables w and z
|
||||
if self.approx_type == 'hess':
|
||||
w = delta_x
|
||||
z = delta_grad
|
||||
else:
|
||||
w = delta_grad
|
||||
z = delta_x
|
||||
# Do some common operations
|
||||
wz = np.dot(w, z)
|
||||
Mw = self @ w
|
||||
wMw = Mw.dot(w)
|
||||
# Guarantee that wMw > 0 by reinitializing matrix.
|
||||
# While this is always true in exact arithmetic,
|
||||
# indefinite matrix may appear due to roundoff errors.
|
||||
if wMw <= 0.0:
|
||||
scale = self._auto_scale(delta_x, delta_grad)
|
||||
# Reinitialize matrix
|
||||
if self.approx_type == 'hess':
|
||||
self.B = scale * np.eye(self.n, dtype=float)
|
||||
else:
|
||||
self.H = scale * np.eye(self.n, dtype=float)
|
||||
# Do common operations for new matrix
|
||||
Mw = self @ w
|
||||
wMw = Mw.dot(w)
|
||||
# Check if curvature condition is violated
|
||||
if wz <= self.min_curvature * wMw:
|
||||
# If the option 'skip_update' is set
|
||||
# we just skip the update when the condition
|
||||
# is violated.
|
||||
if self.exception_strategy == 'skip_update':
|
||||
return
|
||||
# If the option 'damp_update' is set we
|
||||
# interpolate between the actual BFGS
|
||||
# result and the unmodified matrix.
|
||||
elif self.exception_strategy == 'damp_update':
|
||||
update_factor = (1-self.min_curvature) / (1 - wz/wMw)
|
||||
z = update_factor*z + (1-update_factor)*Mw
|
||||
wz = np.dot(w, z)
|
||||
# Update matrix
|
||||
if self.approx_type == 'hess':
|
||||
self._update_hessian(wz, Mw, wMw, z)
|
||||
else:
|
||||
self._update_inverse_hessian(wz, Mw, wMw, z)
|
||||
|
||||
|
||||
class SR1(FullHessianUpdateStrategy):
|
||||
"""Symmetric-rank-1 Hessian update strategy.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_denominator : float
|
||||
This number, scaled by a normalization factor,
|
||||
defines the minimum denominator magnitude allowed
|
||||
in the update. When the condition is violated we skip
|
||||
the update. By default uses ``1e-8``.
|
||||
init_scale : {float, np.array, 'auto'}, optional
|
||||
This parameter can be used to initialize the Hessian or its
|
||||
inverse. When a float is given, the relevant array is initialized
|
||||
to ``np.eye(n) * init_scale``, where ``n`` is the problem dimension.
|
||||
Alternatively, if a precisely ``(n, n)`` shaped, symmetric array is given,
|
||||
this array will be used. Otherwise an error is generated.
|
||||
Set it to 'auto' in order to use an automatic heuristic for choosing
|
||||
the initial scale. The heuristic is described in [1]_, p.143.
|
||||
The default is 'auto'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The update is based on the description in [1]_, p.144-146.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
|
||||
def __init__(self, min_denominator=1e-8, init_scale='auto'):
|
||||
self.min_denominator = min_denominator
|
||||
super().__init__(init_scale)
|
||||
|
||||
def _update_implementation(self, delta_x, delta_grad):
|
||||
# Auxiliary variables w and z
|
||||
if self.approx_type == 'hess':
|
||||
w = delta_x
|
||||
z = delta_grad
|
||||
else:
|
||||
w = delta_grad
|
||||
z = delta_x
|
||||
# Do some common operations
|
||||
Mw = self @ w
|
||||
z_minus_Mw = z - Mw
|
||||
denominator = np.dot(w, z_minus_Mw)
|
||||
# If the denominator is too small
|
||||
# we just skip the update.
|
||||
if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):
|
||||
return
|
||||
# Update matrix
|
||||
if self.approx_type == 'hess':
|
||||
self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)
|
||||
else:
|
||||
self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,338 @@
|
|||
from warnings import warn
|
||||
|
||||
import numpy as np
|
||||
import scipy.optimize._highspy._core as _h # type: ignore[import-not-found]
|
||||
from scipy.optimize._highspy import _highs_options as hopt # type: ignore[attr-defined]
|
||||
from scipy.optimize import OptimizeWarning
|
||||
|
||||
|
||||
def _highs_wrapper(c, indptr, indices, data, lhs, rhs, lb, ub, integrality, options):
|
||||
'''Solve linear programs using HiGHS [1]_.
|
||||
|
||||
Assume problems of the form:
|
||||
|
||||
MIN c.T @ x
|
||||
s.t. lhs <= A @ x <= rhs
|
||||
lb <= x <= ub
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array, (n,)
|
||||
Array of objective value coefficients.
|
||||
astart : 1-D array
|
||||
CSC format index array.
|
||||
aindex : 1-D array
|
||||
CSC format index array.
|
||||
avalue : 1-D array
|
||||
Data array of the matrix.
|
||||
lhs : 1-D array (or None), (m,)
|
||||
Array of left hand side values of the inequality constraints.
|
||||
If ``lhs=None``, then an array of ``-inf`` is assumed.
|
||||
rhs : 1-D array, (m,)
|
||||
Array of right hand side values of the inequality constraints.
|
||||
lb : 1-D array (or None), (n,)
|
||||
Lower bounds on solution variables x. If ``lb=None``, then an
|
||||
array of all `0` is assumed.
|
||||
ub : 1-D array (or None), (n,)
|
||||
Upper bounds on solution variables x. If ``ub=None``, then an
|
||||
array of ``inf`` is assumed.
|
||||
options : dict
|
||||
A dictionary of solver options
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : dict
|
||||
|
||||
If model_status is one of kOptimal,
|
||||
kObjectiveBound, kTimeLimit,
|
||||
kIterationLimit:
|
||||
|
||||
- ``status`` : HighsModelStatus
|
||||
Model status code.
|
||||
|
||||
- ``message`` : str
|
||||
Message corresponding to model status code.
|
||||
|
||||
- ``x`` : list
|
||||
Solution variables.
|
||||
|
||||
- ``slack`` : list
|
||||
Slack variables.
|
||||
|
||||
- ``lambda`` : list
|
||||
Lagrange multipliers associated with the constraints
|
||||
Ax = b.
|
||||
|
||||
- ``s`` : list
|
||||
Lagrange multipliers associated with the constraints
|
||||
x >= 0.
|
||||
|
||||
- ``fun``
|
||||
Final objective value.
|
||||
|
||||
- ``simplex_nit`` : int
|
||||
Number of iterations accomplished by the simplex
|
||||
solver.
|
||||
|
||||
- ``ipm_nit`` : int
|
||||
Number of iterations accomplished by the interior-
|
||||
point solver.
|
||||
|
||||
If model_status is not one of the above:
|
||||
|
||||
- ``status`` : HighsModelStatus
|
||||
Model status code.
|
||||
|
||||
- ``message`` : str
|
||||
Message corresponding to model status code.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If ``options['write_solution_to_file']`` is ``True`` but
|
||||
``options['solution_file']`` is unset or ``''``, then the solution
|
||||
will be printed to ``stdout``.
|
||||
|
||||
If any iteration limit is reached, no solution will be
|
||||
available.
|
||||
|
||||
``OptimizeWarning`` will be raised if any option value set by
|
||||
the user is found to be incorrect.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] https://highs.dev/
|
||||
.. [2] https://www.maths.ed.ac.uk/hall/HiGHS/HighsOptions.html
|
||||
'''
|
||||
numcol = c.size
|
||||
numrow = rhs.size
|
||||
isMip = integrality is not None and np.sum(integrality) > 0
|
||||
|
||||
# default "null" return values
|
||||
res = {
|
||||
"x": None,
|
||||
"fun": None,
|
||||
}
|
||||
|
||||
# Fill up a HighsLp object
|
||||
lp = _h.HighsLp()
|
||||
lp.num_col_ = numcol
|
||||
lp.num_row_ = numrow
|
||||
lp.a_matrix_.num_col_ = numcol
|
||||
lp.a_matrix_.num_row_ = numrow
|
||||
lp.a_matrix_.format_ = _h.MatrixFormat.kColwise
|
||||
lp.col_cost_ = c
|
||||
lp.col_lower_ = lb
|
||||
lp.col_upper_ = ub
|
||||
lp.row_lower_ = lhs
|
||||
lp.row_upper_ = rhs
|
||||
lp.a_matrix_.start_ = indptr
|
||||
lp.a_matrix_.index_ = indices
|
||||
lp.a_matrix_.value_ = data
|
||||
if integrality.size > 0:
|
||||
lp.integrality_ = [_h.HighsVarType(i) for i in integrality]
|
||||
|
||||
# Make a Highs object and pass it everything
|
||||
highs = _h._Highs()
|
||||
highs_options = _h.HighsOptions()
|
||||
hoptmanager = hopt.HighsOptionsManager()
|
||||
for key, val in options.items():
|
||||
# handle filtering of unsupported and default options
|
||||
if val is None or key in ("sense",):
|
||||
continue
|
||||
|
||||
# ask for the option type
|
||||
opt_type = hoptmanager.get_option_type(key)
|
||||
if -1 == opt_type:
|
||||
warn(
|
||||
f"Unrecognized options detected: {dict({key: val})}",
|
||||
OptimizeWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
if key in ("presolve", "parallel"):
|
||||
# handle fake bools (require bool -> str conversions)
|
||||
if isinstance(val, bool):
|
||||
val = "on" if val else "off"
|
||||
else:
|
||||
warn(
|
||||
f'Option f"{key}" is "{val}", but only True or False is '
|
||||
f"allowed. Using default.",
|
||||
OptimizeWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
opt_type = _h.HighsOptionType(opt_type)
|
||||
status, msg = check_option(highs, key, val)
|
||||
if opt_type == _h.HighsOptionType.kBool:
|
||||
if not isinstance(val, bool):
|
||||
warn(
|
||||
f'Option f"{key}" is "{val}", but only True or False is '
|
||||
f"allowed. Using default.",
|
||||
OptimizeWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
|
||||
# warn or set option
|
||||
if status != 0:
|
||||
warn(msg, OptimizeWarning, stacklevel=2)
|
||||
else:
|
||||
setattr(highs_options, key, val)
|
||||
|
||||
opt_status = highs.passOptions(highs_options)
|
||||
if opt_status == _h.HighsStatus.kError:
|
||||
res.update(
|
||||
{
|
||||
"status": highs.getModelStatus(),
|
||||
"message": highs.modelStatusToString(highs.getModelStatus()),
|
||||
}
|
||||
)
|
||||
return res
|
||||
|
||||
init_status = highs.passModel(lp)
|
||||
if init_status == _h.HighsStatus.kError:
|
||||
# if model fails to load, highs.getModelStatus() will be NOT_SET
|
||||
err_model_status = _h.HighsModelStatus.kModelError
|
||||
res.update(
|
||||
{
|
||||
"status": err_model_status,
|
||||
"message": highs.modelStatusToString(err_model_status),
|
||||
}
|
||||
)
|
||||
return res
|
||||
|
||||
# Solve the LP
|
||||
run_status = highs.run()
|
||||
if run_status == _h.HighsStatus.kError:
|
||||
res.update(
|
||||
{
|
||||
"status": highs.getModelStatus(),
|
||||
"message": highs.modelStatusToString(highs.getModelStatus()),
|
||||
}
|
||||
)
|
||||
return res
|
||||
|
||||
# Extract what we need from the solution
|
||||
model_status = highs.getModelStatus()
|
||||
|
||||
# it should always be safe to get the info object
|
||||
info = highs.getInfo()
|
||||
|
||||
# Failure modes:
|
||||
# LP: if we have anything other than an Optimal status, it
|
||||
# is unsafe (and unhelpful) to read any results
|
||||
# MIP: has a non-Optimal status or has timed out/reached max iterations
|
||||
# 1) If not Optimal/TimedOut/MaxIter status, there is no solution
|
||||
# 2) If TimedOut/MaxIter status, there may be a feasible solution.
|
||||
# if the objective function value is not Infinity, then the
|
||||
# current solution is feasible and can be returned. Else, there
|
||||
# is no solution.
|
||||
mipFailCondition = model_status not in (
|
||||
_h.HighsModelStatus.kOptimal,
|
||||
_h.HighsModelStatus.kTimeLimit,
|
||||
_h.HighsModelStatus.kIterationLimit,
|
||||
_h.HighsModelStatus.kSolutionLimit,
|
||||
) or (
|
||||
model_status
|
||||
in {
|
||||
_h.HighsModelStatus.kTimeLimit,
|
||||
_h.HighsModelStatus.kIterationLimit,
|
||||
_h.HighsModelStatus.kSolutionLimit,
|
||||
}
|
||||
and (info.objective_function_value == _h.kHighsInf)
|
||||
)
|
||||
lpFailCondition = model_status != _h.HighsModelStatus.kOptimal
|
||||
if (isMip and mipFailCondition) or (not isMip and lpFailCondition):
|
||||
res.update(
|
||||
{
|
||||
"status": model_status,
|
||||
"message": "model_status is "
|
||||
f"{highs.modelStatusToString(model_status)}; "
|
||||
"primal_status is "
|
||||
f"{highs.solutionStatusToString(info.primal_solution_status)}",
|
||||
"simplex_nit": info.simplex_iteration_count,
|
||||
"ipm_nit": info.ipm_iteration_count,
|
||||
"crossover_nit": info.crossover_iteration_count,
|
||||
}
|
||||
)
|
||||
return res
|
||||
|
||||
# Should be safe to read the solution:
|
||||
solution = highs.getSolution()
|
||||
basis = highs.getBasis()
|
||||
|
||||
# Lagrangians for bounds based on column statuses
|
||||
marg_bnds = np.zeros((2, numcol))
|
||||
basis_col_status = basis.col_status
|
||||
solution_col_dual = solution.col_dual
|
||||
for ii in range(numcol):
|
||||
if basis_col_status[ii] == _h.HighsBasisStatus.kLower:
|
||||
marg_bnds[0, ii] = solution_col_dual[ii]
|
||||
elif basis_col_status[ii] == _h.HighsBasisStatus.kUpper:
|
||||
marg_bnds[1, ii] = solution_col_dual[ii]
|
||||
|
||||
res.update(
|
||||
{
|
||||
"status": model_status,
|
||||
"message": highs.modelStatusToString(model_status),
|
||||
# Primal solution
|
||||
"x": np.array(solution.col_value),
|
||||
# Ax + s = b => Ax = b - s
|
||||
# Note: this is for all constraints (A_ub and A_eq)
|
||||
"slack": rhs - solution.row_value,
|
||||
# lambda are the lagrange multipliers associated with Ax=b
|
||||
"lambda": np.array(solution.row_dual),
|
||||
"marg_bnds": marg_bnds,
|
||||
"fun": info.objective_function_value,
|
||||
"simplex_nit": info.simplex_iteration_count,
|
||||
"ipm_nit": info.ipm_iteration_count,
|
||||
"crossover_nit": info.crossover_iteration_count,
|
||||
}
|
||||
)
|
||||
|
||||
if isMip:
|
||||
res.update(
|
||||
{
|
||||
"mip_node_count": info.mip_node_count,
|
||||
"mip_dual_bound": info.mip_dual_bound,
|
||||
"mip_gap": info.mip_gap,
|
||||
}
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def check_option(highs_inst, option, value):
|
||||
status, option_type = highs_inst.getOptionType(option)
|
||||
hoptmanager = hopt.HighsOptionsManager()
|
||||
|
||||
if status != _h.HighsStatus.kOk:
|
||||
return -1, "Invalid option name."
|
||||
|
||||
valid_types = {
|
||||
_h.HighsOptionType.kBool: bool,
|
||||
_h.HighsOptionType.kInt: int,
|
||||
_h.HighsOptionType.kDouble: float,
|
||||
_h.HighsOptionType.kString: str,
|
||||
}
|
||||
|
||||
expected_type = valid_types.get(option_type, None)
|
||||
|
||||
if expected_type is str:
|
||||
if not hoptmanager.check_string_option(option, value):
|
||||
return -1, "Invalid option value."
|
||||
if expected_type is float:
|
||||
if not hoptmanager.check_double_option(option, value):
|
||||
return -1, "Invalid option value."
|
||||
if expected_type is int:
|
||||
if not hoptmanager.check_int_option(option, value):
|
||||
return -1, "Invalid option value."
|
||||
|
||||
if expected_type is None:
|
||||
return 3, "Unknown option type."
|
||||
|
||||
status, current_value = highs_inst.getOptionValue(option)
|
||||
if status != _h.HighsStatus.kOk:
|
||||
return 4, "Failed to validate option value."
|
||||
return 0, "Check option succeeded."
|
||||
157
venv/lib/python3.13/site-packages/scipy/optimize/_isotonic.py
Normal file
157
venv/lib/python3.13/site-packages/scipy/optimize/_isotonic.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ._optimize import OptimizeResult
|
||||
from ._pava_pybind import pava
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy.typing as npt
|
||||
|
||||
|
||||
__all__ = ["isotonic_regression"]
|
||||
|
||||
|
||||
def isotonic_regression(
|
||||
y: "npt.ArrayLike",
|
||||
*,
|
||||
weights: "npt.ArrayLike | None" = None,
|
||||
increasing: bool = True,
|
||||
) -> OptimizeResult:
|
||||
r"""Nonparametric isotonic regression.
|
||||
|
||||
A (not strictly) monotonically increasing array `x` with the same length
|
||||
as `y` is calculated by the pool adjacent violators algorithm (PAVA), see
|
||||
[1]_. See the Notes section for more details.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : (N,) array_like
|
||||
Response variable.
|
||||
weights : (N,) array_like or None
|
||||
Case weights.
|
||||
increasing : bool
|
||||
If True, fit monotonic increasing, i.e. isotonic, regression.
|
||||
If False, fit a monotonic decreasing, i.e. antitonic, regression.
|
||||
Default is True.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a ``OptimizeResult`` object.
|
||||
Important attributes are:
|
||||
|
||||
- ``x``: The isotonic regression solution, i.e. an increasing (or
|
||||
decreasing) array of the same length than y, with elements in the
|
||||
range from min(y) to max(y).
|
||||
- ``weights`` : Array with the sum of case weights for each block
|
||||
(or pool) B.
|
||||
- ``blocks``: Array of length B+1 with the indices of the start
|
||||
positions of each block (or pool) B. The j-th block is given by
|
||||
``x[blocks[j]:blocks[j+1]]`` for which all values are the same.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Given data :math:`y` and case weights :math:`w`, the isotonic regression
|
||||
solves the following optimization problem:
|
||||
|
||||
.. math::
|
||||
|
||||
\operatorname{argmin}_{x_i} \sum_i w_i (y_i - x_i)^2 \quad
|
||||
\text{subject to } x_i \leq x_j \text{ whenever } i \leq j \,.
|
||||
|
||||
For every input value :math:`y_i`, it generates a value :math:`x_i` such
|
||||
that :math:`x` is increasing (but not strictly), i.e.
|
||||
:math:`x_i \leq x_{i+1}`. This is accomplished by the PAVA.
|
||||
The solution consists of pools or blocks, i.e. neighboring elements of
|
||||
:math:`x`, e.g. :math:`x_i` and :math:`x_{i+1}`, that all have the same
|
||||
value.
|
||||
|
||||
Most interestingly, the solution stays the same if the squared loss is
|
||||
replaced by the wide class of Bregman functions which are the unique
|
||||
class of strictly consistent scoring functions for the mean, see [2]_
|
||||
and references therein.
|
||||
|
||||
The implemented version of PAVA according to [1]_ has a computational
|
||||
complexity of O(N) with input size N.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Busing, F. M. T. A. (2022).
|
||||
Monotone Regression: A Simple and Fast O(n) PAVA Implementation.
|
||||
Journal of Statistical Software, Code Snippets, 102(1), 1-25.
|
||||
:doi:`10.18637/jss.v102.c01`
|
||||
.. [2] Jordan, A.I., Mühlemann, A. & Ziegel, J.F.
|
||||
Characterizing the optimal solutions to the isotonic regression
|
||||
problem for identifiable functionals.
|
||||
Ann Inst Stat Math 74, 489-514 (2022).
|
||||
:doi:`10.1007/s10463-021-00808-0`
|
||||
|
||||
Examples
|
||||
--------
|
||||
This example demonstrates that ``isotonic_regression`` really solves a
|
||||
constrained optimization problem.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import isotonic_regression, minimize
|
||||
>>> y = [1.5, 1.0, 4.0, 6.0, 5.7, 5.0, 7.8, 9.0, 7.5, 9.5, 9.0]
|
||||
>>> def objective(yhat, y):
|
||||
... return np.sum((yhat - y)**2)
|
||||
>>> def constraint(yhat, y):
|
||||
... # This is for a monotonically increasing regression.
|
||||
... return np.diff(yhat)
|
||||
>>> result = minimize(objective, x0=y, args=(y,),
|
||||
... constraints=[{'type': 'ineq',
|
||||
... 'fun': lambda x: constraint(x, y)}])
|
||||
>>> result.x
|
||||
array([1.25 , 1.25 , 4. , 5.56666667, 5.56666667,
|
||||
5.56666667, 7.8 , 8.25 , 8.25 , 9.25 ,
|
||||
9.25 ])
|
||||
>>> result = isotonic_regression(y)
|
||||
>>> result.x
|
||||
array([1.25 , 1.25 , 4. , 5.56666667, 5.56666667,
|
||||
5.56666667, 7.8 , 8.25 , 8.25 , 9.25 ,
|
||||
9.25 ])
|
||||
|
||||
The big advantage of ``isotonic_regression`` compared to calling
|
||||
``minimize`` is that it is more user friendly, i.e. one does not need to
|
||||
define objective and constraint functions, and that it is orders of
|
||||
magnitudes faster. On commodity hardware (in 2023), for normal distributed
|
||||
input y of length 1000, the minimizer takes about 4 seconds, while
|
||||
``isotonic_regression`` takes about 200 microseconds.
|
||||
"""
|
||||
yarr = np.atleast_1d(y) # Check yarr.ndim == 1 is implicit (pybind11) in pava.
|
||||
order = slice(None) if increasing else slice(None, None, -1)
|
||||
x = np.array(yarr[order], order="C", dtype=np.float64, copy=True)
|
||||
if weights is None:
|
||||
wx = np.ones_like(yarr, dtype=np.float64)
|
||||
else:
|
||||
warr = np.atleast_1d(weights)
|
||||
|
||||
if not (yarr.ndim == warr.ndim == 1 and yarr.shape[0] == warr.shape[0]):
|
||||
raise ValueError(
|
||||
"Input arrays y and w must have one dimension of equal length."
|
||||
)
|
||||
if np.any(warr <= 0):
|
||||
raise ValueError("Weights w must be strictly positive.")
|
||||
|
||||
wx = np.array(warr[order], order="C", dtype=np.float64, copy=True)
|
||||
n = x.shape[0]
|
||||
r = np.full(shape=n + 1, fill_value=-1, dtype=np.intp)
|
||||
x, wx, r, b = pava(x, wx, r)
|
||||
# Now that we know the number of blocks b, we only keep the relevant part
|
||||
# of r and wx.
|
||||
# As information: Due to the pava implementation, after the last block
|
||||
# index, there might be smaller numbers appended to r, e.g.
|
||||
# r = [0, 10, 8, 7] which in the end should be r = [0, 10].
|
||||
r = r[:b + 1] # type: ignore[assignment]
|
||||
wx = wx[:b]
|
||||
if not increasing:
|
||||
x = x[::-1]
|
||||
wx = wx[::-1]
|
||||
r = r[-1] - r[::-1]
|
||||
return OptimizeResult(
|
||||
x=x,
|
||||
weights=wx,
|
||||
blocks=r,
|
||||
)
|
||||
Binary file not shown.
634
venv/lib/python3.13/site-packages/scipy/optimize/_lbfgsb_py.py
Normal file
634
venv/lib/python3.13/site-packages/scipy/optimize/_lbfgsb_py.py
Normal file
|
|
@ -0,0 +1,634 @@
|
|||
"""
|
||||
Functions
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fmin_l_bfgs_b
|
||||
|
||||
"""
|
||||
|
||||
## License for the Python wrapper
|
||||
## ==============================
|
||||
|
||||
## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
|
||||
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a
|
||||
## copy of this software and associated documentation files (the "Software"),
|
||||
## to deal in the Software without restriction, including without limitation
|
||||
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
## and/or sell copies of the Software, and to permit persons to whom the
|
||||
## Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
## DEALINGS IN THE SOFTWARE.
|
||||
|
||||
## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
|
||||
|
||||
import numpy as np
|
||||
from numpy import array, asarray, float64, zeros
|
||||
from . import _lbfgsb
|
||||
from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
|
||||
_wrap_callback, _check_unknown_options,
|
||||
_prepare_scalar_function)
|
||||
from ._constraints import old_bound_to_new
|
||||
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
from scipy._lib.deprecation import _NoValue
|
||||
import warnings
|
||||
|
||||
__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
|
||||
|
||||
|
||||
status_messages = {
|
||||
0 : "START",
|
||||
1 : "NEW_X",
|
||||
2 : "RESTART",
|
||||
3 : "FG",
|
||||
4 : "CONVERGENCE",
|
||||
5 : "STOP",
|
||||
6 : "WARNING",
|
||||
7 : "ERROR",
|
||||
8 : "ABNORMAL"
|
||||
}
|
||||
|
||||
|
||||
task_messages = {
|
||||
0 : "",
|
||||
301 : "",
|
||||
302 : "",
|
||||
401 : "NORM OF PROJECTED GRADIENT <= PGTOL",
|
||||
402 : "RELATIVE REDUCTION OF F <= FACTR*EPSMCH",
|
||||
501 : "CPU EXCEEDING THE TIME LIMIT",
|
||||
502 : "TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT",
|
||||
503 : "PROJECTED GRADIENT IS SUFFICIENTLY SMALL",
|
||||
504 : "TOTAL NO. OF ITERATIONS REACHED LIMIT",
|
||||
505 : "CALLBACK REQUESTED HALT",
|
||||
601 : "ROUNDING ERRORS PREVENT PROGRESS",
|
||||
602 : "STP = STPMAX",
|
||||
603 : "STP = STPMIN",
|
||||
604 : "XTOL TEST SATISFIED",
|
||||
701 : "NO FEASIBLE SOLUTION",
|
||||
702 : "FACTR < 0",
|
||||
703 : "FTOL < 0",
|
||||
704 : "GTOL < 0",
|
||||
705 : "XTOL < 0",
|
||||
706 : "STP < STPMIN",
|
||||
707 : "STP > STPMAX",
|
||||
708 : "STPMIN < 0",
|
||||
709 : "STPMAX < STPMIN",
|
||||
710 : "INITIAL G >= 0",
|
||||
711 : "M <= 0",
|
||||
712 : "N <= 0",
|
||||
713 : "INVALID NBD",
|
||||
}
|
||||
|
||||
def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
|
||||
approx_grad=0,
|
||||
bounds=None, m=10, factr=1e7, pgtol=1e-5,
|
||||
epsilon=1e-8,
|
||||
iprint=_NoValue, maxfun=15000, maxiter=15000, disp=_NoValue,
|
||||
callback=None, maxls=20):
|
||||
"""
|
||||
Minimize a function func using the L-BFGS-B algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable f(x,*args)
|
||||
Function to minimize.
|
||||
x0 : ndarray
|
||||
Initial guess.
|
||||
fprime : callable fprime(x,*args), optional
|
||||
The gradient of `func`. If None, then `func` returns the function
|
||||
value and the gradient (``f, g = func(x, *args)``), unless
|
||||
`approx_grad` is True in which case `func` returns only ``f``.
|
||||
args : sequence, optional
|
||||
Arguments to pass to `func` and `fprime`.
|
||||
approx_grad : bool, optional
|
||||
Whether to approximate the gradient numerically (in which case
|
||||
`func` returns only the function value).
|
||||
bounds : list, optional
|
||||
``(min, max)`` pairs for each element in ``x``, defining
|
||||
the bounds on that parameter. Use None or +-inf for one of ``min`` or
|
||||
``max`` when there is no bound in that direction.
|
||||
m : int, optional
|
||||
The maximum number of variable metric corrections
|
||||
used to define the limited memory matrix. (The limited memory BFGS
|
||||
method does not store the full hessian but uses this many terms in an
|
||||
approximation to it.)
|
||||
factr : float, optional
|
||||
The iteration stops when
|
||||
``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
|
||||
where ``eps`` is the machine precision, which is automatically
|
||||
generated by the code. Typical values for `factr` are: 1e12 for
|
||||
low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
|
||||
high accuracy. See Notes for relationship to `ftol`, which is exposed
|
||||
(instead of `factr`) by the `scipy.optimize.minimize` interface to
|
||||
L-BFGS-B.
|
||||
pgtol : float, optional
|
||||
The iteration will stop when
|
||||
``max{|proj g_i | i = 1, ..., n} <= pgtol``
|
||||
where ``proj g_i`` is the i-th component of the projected gradient.
|
||||
epsilon : float, optional
|
||||
Step size used when `approx_grad` is True, for numerically
|
||||
calculating the gradient
|
||||
iprint : int, optional
|
||||
Deprecated option that previously controlled the text printed on the
|
||||
screen during the problem solution. Now the code does not emit any
|
||||
output and this keyword has no function.
|
||||
|
||||
.. deprecated:: 1.15.0
|
||||
This keyword is deprecated and will be removed from SciPy 1.18.0.
|
||||
|
||||
disp : int, optional
|
||||
Deprecated option that previously controlled the text printed on the
|
||||
screen during the problem solution. Now the code does not emit any
|
||||
output and this keyword has no function.
|
||||
|
||||
.. deprecated:: 1.15.0
|
||||
This keyword is deprecated and will be removed from SciPy 1.18.0.
|
||||
|
||||
maxfun : int, optional
|
||||
Maximum number of function evaluations. Note that this function
|
||||
may violate the limit because of evaluating gradients by numerical
|
||||
differentiation.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
callback : callable, optional
|
||||
Called after each iteration, as ``callback(xk)``, where ``xk`` is the
|
||||
current parameter vector.
|
||||
maxls : int, optional
|
||||
Maximum number of line search steps (per iteration). Default is 20.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : array_like
|
||||
Estimated position of the minimum.
|
||||
f : float
|
||||
Value of `func` at the minimum.
|
||||
d : dict
|
||||
Information dictionary.
|
||||
|
||||
* d['warnflag'] is
|
||||
|
||||
- 0 if converged,
|
||||
- 1 if too many function evaluations or too many iterations,
|
||||
- 2 if stopped for another reason, given in d['task']
|
||||
|
||||
* d['grad'] is the gradient at the minimum (should be 0 ish)
|
||||
* d['funcalls'] is the number of function calls made.
|
||||
* d['nit'] is the number of iterations.
|
||||
|
||||
See also
|
||||
--------
|
||||
minimize: Interface to minimization algorithms for multivariate
|
||||
functions. See the 'L-BFGS-B' `method` in particular. Note that the
|
||||
`ftol` option is made available via that interface, while `factr` is
|
||||
provided via this interface, where `factr` is the factor multiplying
|
||||
the default machine floating-point precision to arrive at `ftol`:
|
||||
``ftol = factr * numpy.finfo(float).eps``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
SciPy uses a C-translated and modified version of the Fortran code,
|
||||
L-BFGS-B v3.0 (released April 25, 2011, BSD-3 licensed). Original Fortran
|
||||
version was written by Ciyou Zhu, Richard Byrd, Jorge Nocedal and,
|
||||
Jose Luis Morales.
|
||||
|
||||
References
|
||||
----------
|
||||
* R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
|
||||
Constrained Optimization, (1995), SIAM Journal on Scientific and
|
||||
Statistical Computing, 16, 5, pp. 1190-1208.
|
||||
* C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
|
||||
FORTRAN routines for large scale bound constrained optimization (1997),
|
||||
ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
|
||||
* J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
|
||||
FORTRAN routines for large scale bound constrained optimization (2011),
|
||||
ACM Transactions on Mathematical Software, 38, 1.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we
|
||||
define an objective function ``f(m, b) = (y - y_model)**2``, where `y`
|
||||
describes the observations and `y_model` the prediction of the linear model
|
||||
as ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``,
|
||||
are arbitrarily chosen as ``(0,5)`` and ``(5,10)`` for this example.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import fmin_l_bfgs_b
|
||||
>>> X = np.arange(0, 10, 1)
|
||||
>>> M = 2
|
||||
>>> B = 3
|
||||
>>> Y = M * X + B
|
||||
>>> def func(parameters, *args):
|
||||
... x = args[0]
|
||||
... y = args[1]
|
||||
... m, b = parameters
|
||||
... y_model = m*x + b
|
||||
... error = sum(np.power((y - y_model), 2))
|
||||
... return error
|
||||
|
||||
>>> initial_values = np.array([0.0, 1.0])
|
||||
|
||||
>>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
|
||||
... approx_grad=True)
|
||||
>>> x_opt, f_opt
|
||||
array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary
|
||||
|
||||
The optimized parameters in ``x_opt`` agree with the ground truth parameters
|
||||
``m`` and ``b``. Next, let us perform a bound constrained optimization using
|
||||
the `bounds` parameter.
|
||||
|
||||
>>> bounds = [(0, 5), (5, 10)]
|
||||
>>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
|
||||
... approx_grad=True, bounds=bounds)
|
||||
>>> x_opt, f_opt
|
||||
array([1.65990508, 5.31649385]), 15.721334516453945 # may vary
|
||||
"""
|
||||
# handle fprime/approx_grad
|
||||
if approx_grad:
|
||||
fun = func
|
||||
jac = None
|
||||
elif fprime is None:
|
||||
fun = MemoizeJac(func)
|
||||
jac = fun.derivative
|
||||
else:
|
||||
fun = func
|
||||
jac = fprime
|
||||
|
||||
# build options
|
||||
callback = _wrap_callback(callback)
|
||||
opts = {'disp': disp,
|
||||
'iprint': iprint,
|
||||
'maxcor': m,
|
||||
'ftol': factr * np.finfo(float).eps,
|
||||
'gtol': pgtol,
|
||||
'eps': epsilon,
|
||||
'maxfun': maxfun,
|
||||
'maxiter': maxiter,
|
||||
'callback': callback,
|
||||
'maxls': maxls}
|
||||
|
||||
res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
|
||||
**opts)
|
||||
d = {'grad': res['jac'],
|
||||
'task': res['message'],
|
||||
'funcalls': res['nfev'],
|
||||
'nit': res['nit'],
|
||||
'warnflag': res['status']}
|
||||
f = res['fun']
|
||||
x = res['x']
|
||||
|
||||
return x, f, d
|
||||
|
||||
|
||||
def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
|
||||
disp=_NoValue, maxcor=10, ftol=2.2204460492503131e-09,
|
||||
gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
|
||||
iprint=_NoValue, callback=None, maxls=20,
|
||||
finite_diff_rel_step=None, workers=None,
|
||||
**unknown_options):
|
||||
"""
|
||||
Minimize a scalar function of one or more variables using the L-BFGS-B
|
||||
algorithm.
|
||||
|
||||
Options
|
||||
-------
|
||||
disp : None or int
|
||||
Deprecated option that previously controlled the text printed on the
|
||||
screen during the problem solution. Now the code does not emit any
|
||||
output and this keyword has no function.
|
||||
|
||||
.. deprecated:: 1.15.0
|
||||
This keyword is deprecated and will be removed from SciPy 1.18.0.
|
||||
|
||||
maxcor : int
|
||||
The maximum number of variable metric corrections used to
|
||||
define the limited memory matrix. (The limited memory BFGS
|
||||
method does not store the full hessian but uses this many terms
|
||||
in an approximation to it.)
|
||||
ftol : float
|
||||
The iteration stops when ``(f^k -
|
||||
f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
|
||||
gtol : float
|
||||
The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
|
||||
<= gtol`` where ``proj g_i`` is the i-th component of the
|
||||
projected gradient.
|
||||
eps : float or ndarray
|
||||
If `jac is None` the absolute step size used for numerical
|
||||
approximation of the jacobian via forward differences.
|
||||
maxfun : int
|
||||
Maximum number of function evaluations before minimization terminates.
|
||||
Note that this function may violate the limit if the gradients
|
||||
are evaluated by numerical differentiation.
|
||||
maxiter : int
|
||||
Maximum number of algorithm iterations.
|
||||
iprint : int, optional
|
||||
Deprecated option that previously controlled the text printed on the
|
||||
screen during the problem solution. Now the code does not emit any
|
||||
output and this keyword has no function.
|
||||
|
||||
.. deprecated:: 1.15.0
|
||||
This keyword is deprecated and will be removed from SciPy 1.18.0.
|
||||
|
||||
maxls : int, optional
|
||||
Maximum number of line search steps (per iteration). Default is 20.
|
||||
finite_diff_rel_step : None or array_like, optional
|
||||
If ``jac in ['2-point', '3-point', 'cs']`` the relative step size to
|
||||
use for numerical approximation of the jacobian. The absolute step
|
||||
size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
|
||||
possibly adjusted to fit into the bounds. For ``method='3-point'``
|
||||
the sign of `h` is ignored. If None (default) then step is selected
|
||||
automatically.
|
||||
workers : int, map-like callable, optional
|
||||
A map-like callable, such as `multiprocessing.Pool.map` for evaluating
|
||||
any numerical differentiation in parallel.
|
||||
This evaluation is carried out as ``workers(fun, iterable)``.
|
||||
|
||||
.. versionadded:: 1.16.0
|
||||
|
||||
Notes
|
||||
-----
|
||||
The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
|
||||
but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
|
||||
relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
|
||||
I.e., `factr` multiplies the default machine floating-point precision to
|
||||
arrive at `ftol`.
|
||||
If the minimization is slow to converge the optimizer may halt if the
|
||||
total number of function evaluations exceeds `maxfun`, or the number of
|
||||
algorithm iterations has reached `maxiter` (whichever comes first). If
|
||||
this is the case then ``result.success=False``, and an appropriate
|
||||
error message is contained in ``result.message``.
|
||||
|
||||
"""
|
||||
_check_unknown_options(unknown_options)
|
||||
m = maxcor
|
||||
pgtol = gtol
|
||||
factr = ftol / np.finfo(float).eps
|
||||
|
||||
x0 = asarray(x0).ravel()
|
||||
n, = x0.shape
|
||||
if disp is not _NoValue:
|
||||
warnings.warn("scipy.optimize: The `disp` and `iprint` options of the "
|
||||
"L-BFGS-B solver are deprecated and will be removed in "
|
||||
"SciPy 1.18.0.",
|
||||
DeprecationWarning, stacklevel=3)
|
||||
|
||||
if iprint is not _NoValue:
|
||||
warnings.warn("scipy.optimize: The `disp` and `iprint` options of the "
|
||||
"L-BFGS-B solver are deprecated and will be removed in "
|
||||
"SciPy 1.18.0.",
|
||||
DeprecationWarning, stacklevel=3)
|
||||
|
||||
# historically old-style bounds were/are expected by lbfgsb.
|
||||
# That's still the case but we'll deal with new-style from here on,
|
||||
# it's easier
|
||||
if bounds is None:
|
||||
pass
|
||||
elif len(bounds) != n:
|
||||
raise ValueError('length of x0 != length of bounds')
|
||||
else:
|
||||
bounds = np.array(old_bound_to_new(bounds))
|
||||
|
||||
# check bounds
|
||||
if (bounds[0] > bounds[1]).any():
|
||||
raise ValueError(
|
||||
"LBFGSB - one of the lower bounds is greater than an upper bound."
|
||||
)
|
||||
|
||||
# initial vector must lie within the bounds. Otherwise ScalarFunction and
|
||||
# approx_derivative will cause problems
|
||||
x0 = np.clip(x0, bounds[0], bounds[1])
|
||||
|
||||
# _prepare_scalar_function can use bounds=None to represent no bounds
|
||||
sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
|
||||
bounds=bounds,
|
||||
finite_diff_rel_step=finite_diff_rel_step,
|
||||
workers=workers)
|
||||
|
||||
func_and_grad = sf.fun_and_grad
|
||||
|
||||
nbd = zeros(n, np.int32)
|
||||
low_bnd = zeros(n, float64)
|
||||
upper_bnd = zeros(n, float64)
|
||||
bounds_map = {(-np.inf, np.inf): 0,
|
||||
(1, np.inf): 1,
|
||||
(1, 1): 2,
|
||||
(-np.inf, 1): 3}
|
||||
|
||||
if bounds is not None:
|
||||
for i in range(0, n):
|
||||
L, U = bounds[0, i], bounds[1, i]
|
||||
if not np.isinf(L):
|
||||
low_bnd[i] = L
|
||||
L = 1
|
||||
if not np.isinf(U):
|
||||
upper_bnd[i] = U
|
||||
U = 1
|
||||
nbd[i] = bounds_map[L, U]
|
||||
|
||||
if not maxls > 0:
|
||||
raise ValueError('maxls must be positive.')
|
||||
|
||||
x = array(x0, dtype=np.float64)
|
||||
f = array(0.0, dtype=np.int32)
|
||||
g = zeros((n,), dtype=np.int32)
|
||||
wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
|
||||
iwa = zeros(3*n, dtype=np.int32)
|
||||
task = zeros(2, dtype=np.int32)
|
||||
ln_task = zeros(2, dtype=np.int32)
|
||||
lsave = zeros(4, dtype=np.int32)
|
||||
isave = zeros(44, dtype=np.int32)
|
||||
dsave = zeros(29, dtype=float64)
|
||||
|
||||
n_iterations = 0
|
||||
|
||||
while True:
|
||||
# g may become float32 if a user provides a function that calculates
|
||||
# the Jacobian in float32 (see gh-18730). The underlying code expects
|
||||
# float64, so upcast it
|
||||
g = g.astype(np.float64)
|
||||
# x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
|
||||
_lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, pgtol, wa,
|
||||
iwa, task, lsave, isave, dsave, maxls, ln_task)
|
||||
|
||||
if task[0] == 3:
|
||||
# The minimization routine wants f and g at the current x.
|
||||
# Note that interruptions due to maxfun are postponed
|
||||
# until the completion of the current minimization iteration.
|
||||
# Overwrite f and g:
|
||||
f, g = func_and_grad(x)
|
||||
elif task[0] == 1:
|
||||
# new iteration
|
||||
n_iterations += 1
|
||||
|
||||
intermediate_result = OptimizeResult(x=x, fun=f)
|
||||
if _call_callback_maybe_halt(callback, intermediate_result):
|
||||
task[0] = 5
|
||||
task[1] = 505
|
||||
if n_iterations >= maxiter:
|
||||
task[0] = 5
|
||||
task[1] = 504
|
||||
elif sf.nfev > maxfun:
|
||||
task[0] = 5
|
||||
task[1] = 502
|
||||
else:
|
||||
break
|
||||
|
||||
if task[0] == 4:
|
||||
warnflag = 0
|
||||
elif sf.nfev > maxfun or n_iterations >= maxiter:
|
||||
warnflag = 1
|
||||
else:
|
||||
warnflag = 2
|
||||
|
||||
# These two portions of the workspace are described in the mainlb
|
||||
# function docstring in "__lbfgsb.c", ws and wy arguments.
|
||||
s = wa[0: m*n].reshape(m, n)
|
||||
y = wa[m*n: 2*m*n].reshape(m, n)
|
||||
|
||||
# isave(31) = the total number of BFGS updates prior the current iteration.
|
||||
n_bfgs_updates = isave[30]
|
||||
|
||||
n_corrs = min(n_bfgs_updates, maxcor)
|
||||
hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
|
||||
|
||||
msg = status_messages[task[0]] + ": " + task_messages[task[1]]
|
||||
|
||||
return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
|
||||
njev=sf.ngev,
|
||||
nit=n_iterations, status=warnflag, message=msg,
|
||||
x=x, success=(warnflag == 0), hess_inv=hess_inv)
|
||||
|
||||
|
||||
class LbfgsInvHessProduct(LinearOperator):
|
||||
"""Linear operator for the L-BFGS approximate inverse Hessian.
|
||||
|
||||
This operator computes the product of a vector with the approximate inverse
|
||||
of the Hessian of the objective function, using the L-BFGS limited
|
||||
memory approximation to the inverse Hessian, accumulated during the
|
||||
optimization.
|
||||
|
||||
Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
|
||||
interface.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sk : array_like, shape=(n_corr, n)
|
||||
Array of `n_corr` most recent updates to the solution vector.
|
||||
(See [1]).
|
||||
yk : array_like, shape=(n_corr, n)
|
||||
Array of `n_corr` most recent updates to the gradient. (See [1]).
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
|
||||
storage." Mathematics of computation 35.151 (1980): 773-782.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, sk, yk):
|
||||
"""Construct the operator."""
|
||||
if sk.shape != yk.shape or sk.ndim != 2:
|
||||
raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
|
||||
n_corrs, n = sk.shape
|
||||
|
||||
super().__init__(dtype=np.float64, shape=(n, n))
|
||||
|
||||
self.sk = sk
|
||||
self.yk = yk
|
||||
self.n_corrs = n_corrs
|
||||
self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
|
||||
|
||||
def _matvec(self, x):
|
||||
"""Efficient matrix-vector multiply with the BFGS matrices.
|
||||
|
||||
This calculation is described in Section (4) of [1].
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray
|
||||
An array with shape (n,) or (n,1).
|
||||
|
||||
Returns
|
||||
-------
|
||||
y : ndarray
|
||||
The matrix-vector product
|
||||
|
||||
"""
|
||||
s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
|
||||
q = np.array(x, dtype=self.dtype, copy=True)
|
||||
if q.ndim == 2 and q.shape[1] == 1:
|
||||
q = q.reshape(-1)
|
||||
|
||||
alpha = np.empty(n_corrs)
|
||||
|
||||
for i in range(n_corrs-1, -1, -1):
|
||||
alpha[i] = rho[i] * np.dot(s[i], q)
|
||||
q = q - alpha[i]*y[i]
|
||||
|
||||
r = q
|
||||
for i in range(n_corrs):
|
||||
beta = rho[i] * np.dot(y[i], r)
|
||||
r = r + s[i] * (alpha[i] - beta)
|
||||
|
||||
return r
|
||||
|
||||
def _matmat(self, X):
|
||||
"""Efficient matrix-matrix multiply with the BFGS matrices.
|
||||
|
||||
This calculation is described in Section (4) of [1].
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray
|
||||
An array with shape (n,m)
|
||||
|
||||
Returns
|
||||
-------
|
||||
Y : ndarray
|
||||
The matrix-matrix product
|
||||
|
||||
Notes
|
||||
-----
|
||||
This implementation is written starting from _matvec and broadcasting
|
||||
all expressions along the second axis of X.
|
||||
|
||||
"""
|
||||
s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
|
||||
Q = np.array(X, dtype=self.dtype, copy=True)
|
||||
|
||||
alpha = np.empty((n_corrs, Q.shape[1]))
|
||||
|
||||
for i in range(n_corrs-1, -1, -1):
|
||||
alpha[i] = rho[i] * np.dot(s[i], Q)
|
||||
Q -= alpha[i]*y[i][:, np.newaxis]
|
||||
|
||||
R = Q
|
||||
for i in range(n_corrs):
|
||||
beta = rho[i] * np.dot(y[i], R)
|
||||
R += s[i][:, np.newaxis] * (alpha[i] - beta)
|
||||
|
||||
return R
|
||||
|
||||
def todense(self):
|
||||
"""Return a dense array representation of this operator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : ndarray, shape=(n, n)
|
||||
An array with the same shape and containing
|
||||
the same data represented by this `LinearOperator`.
|
||||
|
||||
"""
|
||||
I_arr = np.eye(*self.shape, dtype=self.dtype)
|
||||
return self._matmat(I_arr)
|
||||
896
venv/lib/python3.13/site-packages/scipy/optimize/_linesearch.py
Normal file
896
venv/lib/python3.13/site-packages/scipy/optimize/_linesearch.py
Normal file
|
|
@ -0,0 +1,896 @@
|
|||
"""
|
||||
Functions
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
line_search_armijo
|
||||
line_search_wolfe1
|
||||
line_search_wolfe2
|
||||
scalar_search_wolfe1
|
||||
scalar_search_wolfe2
|
||||
|
||||
"""
|
||||
from warnings import warn
|
||||
|
||||
from ._dcsrch import DCSRCH
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
|
||||
'scalar_search_wolfe1', 'scalar_search_wolfe2',
|
||||
'line_search_armijo']
|
||||
|
||||
class LineSearchWarning(RuntimeWarning):
|
||||
pass
|
||||
|
||||
|
||||
def _check_c1_c2(c1, c2):
|
||||
if not (0 < c1 < c2 < 1):
|
||||
raise ValueError("'c1' and 'c2' do not satisfy"
|
||||
"'0 < c1 < c2 < 1'.")
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Minpack's Wolfe line and scalar searches
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
|
||||
old_fval=None, old_old_fval=None,
|
||||
args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
|
||||
xtol=1e-14):
|
||||
"""
|
||||
As `scalar_search_wolfe1` but do a line search to direction `pk`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function `f(x)`
|
||||
fprime : callable
|
||||
Gradient of `f`
|
||||
xk : array_like
|
||||
Current point
|
||||
pk : array_like
|
||||
Search direction
|
||||
gfk : array_like, optional
|
||||
Gradient of `f` at point `xk`
|
||||
old_fval : float, optional
|
||||
Value of `f` at point `xk`
|
||||
old_old_fval : float, optional
|
||||
Value of `f` at point preceding `xk`
|
||||
|
||||
The rest of the parameters are the same as for `scalar_search_wolfe1`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
stp, f_count, g_count, fval, old_fval
|
||||
As in `line_search_wolfe1`
|
||||
gval : array
|
||||
Gradient of `f` at the final point
|
||||
|
||||
Notes
|
||||
-----
|
||||
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
||||
|
||||
"""
|
||||
if gfk is None:
|
||||
gfk = fprime(xk, *args)
|
||||
|
||||
gval = [gfk]
|
||||
gc = [0]
|
||||
fc = [0]
|
||||
|
||||
def phi(s):
|
||||
fc[0] += 1
|
||||
return f(xk + s*pk, *args)
|
||||
|
||||
def derphi(s):
|
||||
gval[0] = fprime(xk + s*pk, *args)
|
||||
gc[0] += 1
|
||||
return np.dot(gval[0], pk)
|
||||
|
||||
derphi0 = np.dot(gfk, pk)
|
||||
|
||||
stp, fval, old_fval = scalar_search_wolfe1(
|
||||
phi, derphi, old_fval, old_old_fval, derphi0,
|
||||
c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
|
||||
|
||||
return stp, fc[0], gc[0], fval, old_fval, gval[0]
|
||||
|
||||
|
||||
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
|
||||
c1=1e-4, c2=0.9,
|
||||
amax=50, amin=1e-8, xtol=1e-14):
|
||||
"""
|
||||
Scalar function search for alpha that satisfies strong Wolfe conditions
|
||||
|
||||
alpha > 0 is assumed to be a descent direction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
phi : callable phi(alpha)
|
||||
Function at point `alpha`
|
||||
derphi : callable phi'(alpha)
|
||||
Objective function derivative. Returns a scalar.
|
||||
phi0 : float, optional
|
||||
Value of phi at 0
|
||||
old_phi0 : float, optional
|
||||
Value of phi at previous point
|
||||
derphi0 : float, optional
|
||||
Value derphi at 0
|
||||
c1 : float, optional
|
||||
Parameter for Armijo condition rule.
|
||||
c2 : float, optional
|
||||
Parameter for curvature condition rule.
|
||||
amax, amin : float, optional
|
||||
Maximum and minimum step size
|
||||
xtol : float, optional
|
||||
Relative tolerance for an acceptable step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step size, or None if no suitable step was found
|
||||
phi : float
|
||||
Value of `phi` at the new point `alpha`
|
||||
phi0 : float
|
||||
Value of `phi` at `alpha=0`
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses routine DCSRCH from MINPACK.
|
||||
|
||||
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization.
|
||||
In Springer Series in Operations Research and Financial Engineering.
|
||||
(Springer Series in Operations Research and Financial Engineering).
|
||||
Springer Nature.
|
||||
|
||||
"""
|
||||
_check_c1_c2(c1, c2)
|
||||
|
||||
if phi0 is None:
|
||||
phi0 = phi(0.)
|
||||
if derphi0 is None:
|
||||
derphi0 = derphi(0.)
|
||||
|
||||
if old_phi0 is not None and derphi0 != 0:
|
||||
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
|
||||
if alpha1 < 0:
|
||||
alpha1 = 1.0
|
||||
else:
|
||||
alpha1 = 1.0
|
||||
|
||||
maxiter = 100
|
||||
|
||||
dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax)
|
||||
stp, phi1, phi0, task = dcsrch(
|
||||
alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter
|
||||
)
|
||||
|
||||
return stp, phi1, phi0
|
||||
|
||||
|
||||
line_search = line_search_wolfe1
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Pure-Python Wolfe line and scalar searches
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
# Note: `line_search_wolfe2` is the public `scipy.optimize.line_search`
|
||||
|
||||
def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
|
||||
old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
|
||||
extra_condition=None, maxiter=10):
|
||||
"""Find alpha that satisfies strong Wolfe conditions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable f(x,*args)
|
||||
Objective function.
|
||||
myfprime : callable f'(x,*args)
|
||||
Objective function gradient.
|
||||
xk : ndarray
|
||||
Starting point.
|
||||
pk : ndarray
|
||||
Search direction. The search direction must be a descent direction
|
||||
for the algorithm to converge.
|
||||
gfk : ndarray, optional
|
||||
Gradient value for x=xk (xk being the current parameter
|
||||
estimate). Will be recomputed if omitted.
|
||||
old_fval : float, optional
|
||||
Function value for x=xk. Will be recomputed if omitted.
|
||||
old_old_fval : float, optional
|
||||
Function value for the point preceding x=xk.
|
||||
args : tuple, optional
|
||||
Additional arguments passed to objective function.
|
||||
c1 : float, optional
|
||||
Parameter for Armijo condition rule.
|
||||
c2 : float, optional
|
||||
Parameter for curvature condition rule.
|
||||
amax : float, optional
|
||||
Maximum step size
|
||||
extra_condition : callable, optional
|
||||
A callable of the form ``extra_condition(alpha, x, f, g)``
|
||||
returning a boolean. Arguments are the proposed step ``alpha``
|
||||
and the corresponding ``x``, ``f`` and ``g`` values. The line search
|
||||
accepts the value of ``alpha`` only if this
|
||||
callable returns ``True``. If the callable returns ``False``
|
||||
for the step length, the algorithm will continue with
|
||||
new iterates. The callable is only called for iterates
|
||||
satisfying the strong Wolfe conditions.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float or None
|
||||
Alpha for which ``x_new = x0 + alpha * pk``,
|
||||
or None if the line search algorithm did not converge.
|
||||
fc : int
|
||||
Number of function evaluations made.
|
||||
gc : int
|
||||
Number of gradient evaluations made.
|
||||
new_fval : float or None
|
||||
New function value ``f(x_new)=f(x0+alpha*pk)``,
|
||||
or None if the line search algorithm did not converge.
|
||||
old_fval : float
|
||||
Old function value ``f(x0)``.
|
||||
new_slope : float or None
|
||||
The local slope along the search direction at the
|
||||
new value ``<myfprime(x_new), pk>``,
|
||||
or None if the line search algorithm did not converge.
|
||||
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses the line search algorithm to enforce strong Wolfe
|
||||
conditions. See Wright and Nocedal, 'Numerical Optimization',
|
||||
1999, pp. 59-61.
|
||||
|
||||
The search direction `pk` must be a descent direction (e.g.
|
||||
``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe
|
||||
conditions. If the search direction is not a descent direction (e.g.
|
||||
``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import line_search
|
||||
|
||||
A objective function and its gradient are defined.
|
||||
|
||||
>>> def obj_func(x):
|
||||
... return (x[0])**2+(x[1])**2
|
||||
>>> def obj_grad(x):
|
||||
... return [2*x[0], 2*x[1]]
|
||||
|
||||
We can find alpha that satisfies strong Wolfe conditions.
|
||||
|
||||
>>> start_point = np.array([1.8, 1.7])
|
||||
>>> search_gradient = np.array([-1.0, -1.0])
|
||||
>>> line_search(obj_func, obj_grad, start_point, search_gradient)
|
||||
(1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
|
||||
|
||||
"""
|
||||
fc = [0]
|
||||
gc = [0]
|
||||
gval = [None]
|
||||
gval_alpha = [None]
|
||||
|
||||
def phi(alpha):
|
||||
fc[0] += 1
|
||||
return f(xk + alpha * pk, *args)
|
||||
|
||||
fprime = myfprime
|
||||
|
||||
def derphi(alpha):
|
||||
gc[0] += 1
|
||||
gval[0] = fprime(xk + alpha * pk, *args) # store for later use
|
||||
gval_alpha[0] = alpha
|
||||
return np.dot(gval[0], pk)
|
||||
|
||||
if gfk is None:
|
||||
gfk = fprime(xk, *args)
|
||||
derphi0 = np.dot(gfk, pk)
|
||||
|
||||
if extra_condition is not None:
|
||||
# Add the current gradient as argument, to avoid needless
|
||||
# re-evaluation
|
||||
def extra_condition2(alpha, phi):
|
||||
if gval_alpha[0] != alpha:
|
||||
derphi(alpha)
|
||||
x = xk + alpha * pk
|
||||
return extra_condition(alpha, x, phi, gval[0])
|
||||
else:
|
||||
extra_condition2 = None
|
||||
|
||||
alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
|
||||
phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
|
||||
extra_condition2, maxiter=maxiter)
|
||||
|
||||
if derphi_star is None:
|
||||
warn('The line search algorithm did not converge',
|
||||
LineSearchWarning, stacklevel=2)
|
||||
else:
|
||||
# derphi_star is a number (derphi) -- so use the most recently
|
||||
# calculated gradient used in computing it derphi = gfk*pk
|
||||
# this is the gradient at the next step no need to compute it
|
||||
# again in the outer loop.
|
||||
derphi_star = gval[0]
|
||||
|
||||
return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
|
||||
|
||||
|
||||
def scalar_search_wolfe2(phi, derphi, phi0=None,
|
||||
old_phi0=None, derphi0=None,
|
||||
c1=1e-4, c2=0.9, amax=None,
|
||||
extra_condition=None, maxiter=10):
|
||||
"""Find alpha that satisfies strong Wolfe conditions.
|
||||
|
||||
alpha > 0 is assumed to be a descent direction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
phi : callable phi(alpha)
|
||||
Objective scalar function.
|
||||
derphi : callable phi'(alpha)
|
||||
Objective function derivative. Returns a scalar.
|
||||
phi0 : float, optional
|
||||
Value of phi at 0.
|
||||
old_phi0 : float, optional
|
||||
Value of phi at previous point.
|
||||
derphi0 : float, optional
|
||||
Value of derphi at 0
|
||||
c1 : float, optional
|
||||
Parameter for Armijo condition rule.
|
||||
c2 : float, optional
|
||||
Parameter for curvature condition rule.
|
||||
amax : float, optional
|
||||
Maximum step size.
|
||||
extra_condition : callable, optional
|
||||
A callable of the form ``extra_condition(alpha, phi_value)``
|
||||
returning a boolean. The line search accepts the value
|
||||
of ``alpha`` only if this callable returns ``True``.
|
||||
If the callable returns ``False`` for the step length,
|
||||
the algorithm will continue with new iterates.
|
||||
The callable is only called for iterates satisfying
|
||||
the strong Wolfe conditions.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha_star : float or None
|
||||
Best alpha, or None if the line search algorithm did not converge.
|
||||
phi_star : float
|
||||
phi at alpha_star.
|
||||
phi0 : float
|
||||
phi at 0.
|
||||
derphi_star : float or None
|
||||
derphi at alpha_star, or None if the line search algorithm
|
||||
did not converge.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses the line search algorithm to enforce strong Wolfe
|
||||
conditions. See Wright and Nocedal, 'Numerical Optimization',
|
||||
1999, pp. 59-61.
|
||||
|
||||
"""
|
||||
_check_c1_c2(c1, c2)
|
||||
|
||||
if phi0 is None:
|
||||
phi0 = phi(0.)
|
||||
|
||||
if derphi0 is None:
|
||||
derphi0 = derphi(0.)
|
||||
|
||||
alpha0 = 0
|
||||
if old_phi0 is not None and derphi0 != 0:
|
||||
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
|
||||
else:
|
||||
alpha1 = 1.0
|
||||
|
||||
if alpha1 < 0:
|
||||
alpha1 = 1.0
|
||||
|
||||
if amax is not None:
|
||||
alpha1 = min(alpha1, amax)
|
||||
|
||||
phi_a1 = phi(alpha1)
|
||||
#derphi_a1 = derphi(alpha1) evaluated below
|
||||
|
||||
phi_a0 = phi0
|
||||
derphi_a0 = derphi0
|
||||
|
||||
if extra_condition is None:
|
||||
def extra_condition(alpha, phi):
|
||||
return True
|
||||
|
||||
for i in range(maxiter):
|
||||
if alpha1 == 0 or (amax is not None and alpha0 > amax):
|
||||
# alpha1 == 0: This shouldn't happen. Perhaps the increment has
|
||||
# slipped below machine precision?
|
||||
alpha_star = None
|
||||
phi_star = phi0
|
||||
phi0 = old_phi0
|
||||
derphi_star = None
|
||||
|
||||
if alpha1 == 0:
|
||||
msg = 'Rounding errors prevent the line search from converging'
|
||||
else:
|
||||
msg = "The line search algorithm could not find a solution " + \
|
||||
f"less than or equal to amax: {amax}"
|
||||
|
||||
warn(msg, LineSearchWarning, stacklevel=2)
|
||||
break
|
||||
|
||||
not_first_iteration = i > 0
|
||||
if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
|
||||
((phi_a1 >= phi_a0) and not_first_iteration):
|
||||
alpha_star, phi_star, derphi_star = \
|
||||
_zoom(alpha0, alpha1, phi_a0,
|
||||
phi_a1, derphi_a0, phi, derphi,
|
||||
phi0, derphi0, c1, c2, extra_condition)
|
||||
break
|
||||
|
||||
derphi_a1 = derphi(alpha1)
|
||||
if (abs(derphi_a1) <= -c2*derphi0):
|
||||
if extra_condition(alpha1, phi_a1):
|
||||
alpha_star = alpha1
|
||||
phi_star = phi_a1
|
||||
derphi_star = derphi_a1
|
||||
break
|
||||
|
||||
if (derphi_a1 >= 0):
|
||||
alpha_star, phi_star, derphi_star = \
|
||||
_zoom(alpha1, alpha0, phi_a1,
|
||||
phi_a0, derphi_a1, phi, derphi,
|
||||
phi0, derphi0, c1, c2, extra_condition)
|
||||
break
|
||||
|
||||
alpha2 = 2 * alpha1 # increase by factor of two on each iteration
|
||||
if amax is not None:
|
||||
alpha2 = min(alpha2, amax)
|
||||
alpha0 = alpha1
|
||||
alpha1 = alpha2
|
||||
phi_a0 = phi_a1
|
||||
phi_a1 = phi(alpha1)
|
||||
derphi_a0 = derphi_a1
|
||||
|
||||
else:
|
||||
# stopping test maxiter reached
|
||||
alpha_star = alpha1
|
||||
phi_star = phi_a1
|
||||
derphi_star = None
|
||||
warn('The line search algorithm did not converge',
|
||||
LineSearchWarning, stacklevel=2)
|
||||
|
||||
return alpha_star, phi_star, phi0, derphi_star
|
||||
|
||||
|
||||
def _cubicmin(a, fa, fpa, b, fb, c, fc):
|
||||
"""
|
||||
Finds the minimizer for a cubic polynomial that goes through the
|
||||
points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
|
||||
|
||||
If no minimizer can be found, return None.
|
||||
|
||||
"""
|
||||
# f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
|
||||
|
||||
with np.errstate(divide='raise', over='raise', invalid='raise'):
|
||||
try:
|
||||
C = fpa
|
||||
db = b - a
|
||||
dc = c - a
|
||||
denom = (db * dc) ** 2 * (db - dc)
|
||||
d1 = np.empty((2, 2))
|
||||
d1[0, 0] = dc ** 2
|
||||
d1[0, 1] = -db ** 2
|
||||
d1[1, 0] = -dc ** 3
|
||||
d1[1, 1] = db ** 3
|
||||
[A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
|
||||
fc - fa - C * dc]).flatten())
|
||||
A /= denom
|
||||
B /= denom
|
||||
radical = B * B - 3 * A * C
|
||||
xmin = a + (-B + np.sqrt(radical)) / (3 * A)
|
||||
except ArithmeticError:
|
||||
return None
|
||||
if not np.isfinite(xmin):
|
||||
return None
|
||||
return xmin
|
||||
|
||||
|
||||
def _quadmin(a, fa, fpa, b, fb):
|
||||
"""
|
||||
Finds the minimizer for a quadratic polynomial that goes through
|
||||
the points (a,fa), (b,fb) with derivative at a of fpa.
|
||||
|
||||
"""
|
||||
# f(x) = B*(x-a)^2 + C*(x-a) + D
|
||||
with np.errstate(divide='raise', over='raise', invalid='raise'):
|
||||
try:
|
||||
D = fa
|
||||
C = fpa
|
||||
db = b - a * 1.0
|
||||
B = (fb - D - C * db) / (db * db)
|
||||
xmin = a - C / (2.0 * B)
|
||||
except ArithmeticError:
|
||||
return None
|
||||
if not np.isfinite(xmin):
|
||||
return None
|
||||
return xmin
|
||||
|
||||
|
||||
def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
|
||||
phi, derphi, phi0, derphi0, c1, c2, extra_condition):
|
||||
"""Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
|
||||
|
||||
Part of the optimization algorithm in `scalar_search_wolfe2`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
|
||||
'Numerical Optimization', 1999, pp. 61.
|
||||
|
||||
"""
|
||||
|
||||
maxiter = 10
|
||||
i = 0
|
||||
delta1 = 0.2 # cubic interpolant check
|
||||
delta2 = 0.1 # quadratic interpolant check
|
||||
phi_rec = phi0
|
||||
a_rec = 0
|
||||
while True:
|
||||
# interpolate to find a trial step length between a_lo and
|
||||
# a_hi Need to choose interpolation here. Use cubic
|
||||
# interpolation and then if the result is within delta *
|
||||
# dalpha or outside of the interval bounded by a_lo or a_hi
|
||||
# then use quadratic interpolation, if the result is still too
|
||||
# close, then use bisection
|
||||
|
||||
dalpha = a_hi - a_lo
|
||||
if dalpha < 0:
|
||||
a, b = a_hi, a_lo
|
||||
else:
|
||||
a, b = a_lo, a_hi
|
||||
|
||||
# minimizer of cubic interpolant
|
||||
# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
|
||||
#
|
||||
# if the result is too close to the end points (or out of the
|
||||
# interval), then use quadratic interpolation with phi_lo,
|
||||
# derphi_lo and phi_hi if the result is still too close to the
|
||||
# end points (or out of the interval) then use bisection
|
||||
|
||||
if (i > 0):
|
||||
cchk = delta1 * dalpha
|
||||
a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
|
||||
a_rec, phi_rec)
|
||||
if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
|
||||
qchk = delta2 * dalpha
|
||||
a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
|
||||
if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
|
||||
a_j = a_lo + 0.5*dalpha
|
||||
|
||||
# Check new value of a_j
|
||||
|
||||
phi_aj = phi(a_j)
|
||||
if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
|
||||
phi_rec = phi_hi
|
||||
a_rec = a_hi
|
||||
a_hi = a_j
|
||||
phi_hi = phi_aj
|
||||
else:
|
||||
derphi_aj = derphi(a_j)
|
||||
if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
|
||||
a_star = a_j
|
||||
val_star = phi_aj
|
||||
valprime_star = derphi_aj
|
||||
break
|
||||
if derphi_aj*(a_hi - a_lo) >= 0:
|
||||
phi_rec = phi_hi
|
||||
a_rec = a_hi
|
||||
a_hi = a_lo
|
||||
phi_hi = phi_lo
|
||||
else:
|
||||
phi_rec = phi_lo
|
||||
a_rec = a_lo
|
||||
a_lo = a_j
|
||||
phi_lo = phi_aj
|
||||
derphi_lo = derphi_aj
|
||||
i += 1
|
||||
if (i > maxiter):
|
||||
# Failed to find a conforming step size
|
||||
a_star = None
|
||||
val_star = None
|
||||
valprime_star = None
|
||||
break
|
||||
return a_star, val_star, valprime_star
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Armijo line and scalar searches
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
|
||||
"""Minimize over alpha, the function ``f(xk+alpha pk)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function to be minimized.
|
||||
xk : array_like
|
||||
Current point.
|
||||
pk : array_like
|
||||
Search direction.
|
||||
gfk : array_like
|
||||
Gradient of `f` at point `xk`.
|
||||
old_fval : float
|
||||
Value of `f` at point `xk`.
|
||||
args : tuple, optional
|
||||
Optional arguments.
|
||||
c1 : float, optional
|
||||
Value to control stopping criterion.
|
||||
alpha0 : scalar, optional
|
||||
Value of `alpha` at start of the optimization.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha
|
||||
f_count
|
||||
f_val_at_alpha
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses the interpolation algorithm (Armijo backtracking) as suggested by
|
||||
Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
|
||||
|
||||
"""
|
||||
xk = np.atleast_1d(xk)
|
||||
fc = [0]
|
||||
|
||||
def phi(alpha1):
|
||||
fc[0] += 1
|
||||
return f(xk + alpha1*pk, *args)
|
||||
|
||||
if old_fval is None:
|
||||
phi0 = phi(0.)
|
||||
else:
|
||||
phi0 = old_fval # compute f(xk) -- done in past loop
|
||||
|
||||
derphi0 = np.dot(gfk, pk)
|
||||
alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
|
||||
alpha0=alpha0)
|
||||
return alpha, fc[0], phi1
|
||||
|
||||
|
||||
def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
|
||||
"""
|
||||
Compatibility wrapper for `line_search_armijo`
|
||||
"""
|
||||
r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
|
||||
alpha0=alpha0)
|
||||
return r[0], r[1], 0, r[2]
|
||||
|
||||
|
||||
def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
|
||||
"""Minimize over alpha, the function ``phi(alpha)``.
|
||||
|
||||
Uses the interpolation algorithm (Armijo backtracking) as suggested by
|
||||
Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
|
||||
|
||||
alpha > 0 is assumed to be a descent direction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha
|
||||
phi1
|
||||
|
||||
"""
|
||||
phi_a0 = phi(alpha0)
|
||||
if phi_a0 <= phi0 + c1*alpha0*derphi0:
|
||||
return alpha0, phi_a0
|
||||
|
||||
# Otherwise, compute the minimizer of a quadratic interpolant:
|
||||
|
||||
alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
|
||||
phi_a1 = phi(alpha1)
|
||||
|
||||
if (phi_a1 <= phi0 + c1*alpha1*derphi0):
|
||||
return alpha1, phi_a1
|
||||
|
||||
# Otherwise, loop with cubic interpolation until we find an alpha which
|
||||
# satisfies the first Wolfe condition (since we are backtracking, we will
|
||||
# assume that the value of alpha is not too small and satisfies the second
|
||||
# condition.
|
||||
|
||||
while alpha1 > amin: # we are assuming alpha>0 is a descent direction
|
||||
factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
|
||||
a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
|
||||
alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
|
||||
a = a / factor
|
||||
b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
|
||||
alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
|
||||
b = b / factor
|
||||
|
||||
alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
|
||||
phi_a2 = phi(alpha2)
|
||||
|
||||
if (phi_a2 <= phi0 + c1*alpha2*derphi0):
|
||||
return alpha2, phi_a2
|
||||
|
||||
if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
|
||||
alpha2 = alpha1 / 2.0
|
||||
|
||||
alpha0 = alpha1
|
||||
alpha1 = alpha2
|
||||
phi_a0 = phi_a1
|
||||
phi_a1 = phi_a2
|
||||
|
||||
# Failed to find a suitable step length
|
||||
return None, phi_a1
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Non-monotone line search for DF-SANE
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
|
||||
gamma=1e-4, tau_min=0.1, tau_max=0.5):
|
||||
"""
|
||||
Nonmonotone backtracking line search as described in [1]_
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function returning a tuple ``(f, F)`` where ``f`` is the value
|
||||
of a merit function and ``F`` the residual.
|
||||
x_k : ndarray
|
||||
Initial position.
|
||||
d : ndarray
|
||||
Search direction.
|
||||
prev_fs : float
|
||||
List of previous merit function values. Should have ``len(prev_fs) <= M``
|
||||
where ``M`` is the nonmonotonicity window parameter.
|
||||
eta : float
|
||||
Allowed merit function increase, see [1]_
|
||||
gamma, tau_min, tau_max : float, optional
|
||||
Search parameters, see [1]_
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step length
|
||||
xp : ndarray
|
||||
Next position
|
||||
fp : float
|
||||
Merit function value at next position
|
||||
Fp : ndarray
|
||||
Residual at next position
|
||||
|
||||
References
|
||||
----------
|
||||
[1] "Spectral residual method without gradient information for solving
|
||||
large-scale nonlinear systems of equations." W. La Cruz,
|
||||
J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
|
||||
|
||||
"""
|
||||
f_k = prev_fs[-1]
|
||||
f_bar = max(prev_fs)
|
||||
|
||||
alpha_p = 1
|
||||
alpha_m = 1
|
||||
alpha = 1
|
||||
|
||||
while True:
|
||||
xp = x_k + alpha_p * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
|
||||
alpha = alpha_p
|
||||
break
|
||||
|
||||
alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
|
||||
|
||||
xp = x_k - alpha_m * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
|
||||
alpha = -alpha_m
|
||||
break
|
||||
|
||||
alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
|
||||
|
||||
alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
|
||||
alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
|
||||
|
||||
return alpha, xp, fp, Fp
|
||||
|
||||
|
||||
def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
|
||||
gamma=1e-4, tau_min=0.1, tau_max=0.5,
|
||||
nu=0.85):
|
||||
"""
|
||||
Nonmonotone line search from [1]
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function returning a tuple ``(f, F)`` where ``f`` is the value
|
||||
of a merit function and ``F`` the residual.
|
||||
x_k : ndarray
|
||||
Initial position.
|
||||
d : ndarray
|
||||
Search direction.
|
||||
f_k : float
|
||||
Initial merit function value.
|
||||
C, Q : float
|
||||
Control parameters. On the first iteration, give values
|
||||
Q=1.0, C=f_k
|
||||
eta : float
|
||||
Allowed merit function increase, see [1]_
|
||||
nu, gamma, tau_min, tau_max : float, optional
|
||||
Search parameters, see [1]_
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step length
|
||||
xp : ndarray
|
||||
Next position
|
||||
fp : float
|
||||
Merit function value at next position
|
||||
Fp : ndarray
|
||||
Residual at next position
|
||||
C : float
|
||||
New value for the control parameter C
|
||||
Q : float
|
||||
New value for the control parameter Q
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
|
||||
search and its application to the spectral residual
|
||||
method'', IMA J. Numer. Anal. 29, 814 (2009).
|
||||
|
||||
"""
|
||||
alpha_p = 1
|
||||
alpha_m = 1
|
||||
alpha = 1
|
||||
|
||||
while True:
|
||||
xp = x_k + alpha_p * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= C + eta - gamma * alpha_p**2 * f_k:
|
||||
alpha = alpha_p
|
||||
break
|
||||
|
||||
alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
|
||||
|
||||
xp = x_k - alpha_m * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= C + eta - gamma * alpha_m**2 * f_k:
|
||||
alpha = -alpha_m
|
||||
break
|
||||
|
||||
alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
|
||||
|
||||
alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
|
||||
alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
|
||||
|
||||
# Update C and Q
|
||||
Q_next = nu * Q + 1
|
||||
C = (nu * Q * (C + eta) + fp) / Q_next
|
||||
Q = Q_next
|
||||
|
||||
return alpha, xp, fp, Fp, C, Q
|
||||
733
venv/lib/python3.13/site-packages/scipy/optimize/_linprog.py
Normal file
733
venv/lib/python3.13/site-packages/scipy/optimize/_linprog.py
Normal file
|
|
@ -0,0 +1,733 @@
|
|||
"""
|
||||
A top-level linear programming interface.
|
||||
|
||||
.. versionadded:: 0.15.0
|
||||
|
||||
Functions
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linprog
|
||||
linprog_verbose_callback
|
||||
linprog_terse_callback
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ._optimize import OptimizeResult, OptimizeWarning
|
||||
from warnings import warn
|
||||
from ._linprog_highs import _linprog_highs
|
||||
from ._linprog_ip import _linprog_ip
|
||||
from ._linprog_simplex import _linprog_simplex
|
||||
from ._linprog_rs import _linprog_rs
|
||||
from ._linprog_doc import (_linprog_highs_doc, _linprog_ip_doc, # noqa: F401
|
||||
_linprog_rs_doc, _linprog_simplex_doc,
|
||||
_linprog_highs_ipm_doc, _linprog_highs_ds_doc)
|
||||
from ._linprog_util import (
|
||||
_parse_linprog, _presolve, _get_Abc, _LPProblem, _autoscale,
|
||||
_postsolve, _check_result, _display_summary)
|
||||
from copy import deepcopy
|
||||
|
||||
__all__ = ['linprog', 'linprog_verbose_callback', 'linprog_terse_callback']
|
||||
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
LINPROG_METHODS = [
|
||||
'simplex', 'revised simplex', 'interior-point', 'highs', 'highs-ds', 'highs-ipm'
|
||||
]
|
||||
|
||||
|
||||
def linprog_verbose_callback(res):
|
||||
"""
|
||||
A sample callback function demonstrating the linprog callback interface.
|
||||
This callback produces detailed output to sys.stdout before each iteration
|
||||
and after the final iteration of the simplex algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
The independent variable vector which optimizes the linear
|
||||
programming problem.
|
||||
fun : float
|
||||
Value of the objective function.
|
||||
success : bool
|
||||
True if the algorithm succeeded in finding an optimal solution.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable corresponds
|
||||
to an inequality constraint. If the slack is zero, then the
|
||||
corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints, that is,
|
||||
``b - A_eq @ x``
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization:
|
||||
|
||||
``0`` : Optimization terminated successfully
|
||||
|
||||
``1`` : Iteration limit reached
|
||||
|
||||
``2`` : Problem appears to be infeasible
|
||||
|
||||
``3`` : Problem appears to be unbounded
|
||||
|
||||
``4`` : Serious numerical difficulties encountered
|
||||
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
"""
|
||||
x = res['x']
|
||||
fun = res['fun']
|
||||
phase = res['phase']
|
||||
status = res['status']
|
||||
nit = res['nit']
|
||||
message = res['message']
|
||||
complete = res['complete']
|
||||
|
||||
saved_printoptions = np.get_printoptions()
|
||||
np.set_printoptions(linewidth=500,
|
||||
formatter={'float': lambda x: f"{x: 12.4f}"})
|
||||
if status:
|
||||
print('--------- Simplex Early Exit -------\n')
|
||||
print(f'The simplex method exited early with status {status:d}')
|
||||
print(message)
|
||||
elif complete:
|
||||
print('--------- Simplex Complete --------\n')
|
||||
print(f'Iterations required: {nit}')
|
||||
else:
|
||||
print(f'--------- Iteration {nit:d} ---------\n')
|
||||
|
||||
if nit > 0:
|
||||
if phase == 1:
|
||||
print('Current Pseudo-Objective Value:')
|
||||
else:
|
||||
print('Current Objective Value:')
|
||||
print('f = ', fun)
|
||||
print()
|
||||
print('Current Solution Vector:')
|
||||
print('x = ', x)
|
||||
print()
|
||||
|
||||
np.set_printoptions(**saved_printoptions)
|
||||
|
||||
|
||||
def linprog_terse_callback(res):
|
||||
"""
|
||||
A sample callback function demonstrating the linprog callback interface.
|
||||
This callback produces brief output to sys.stdout before each iteration
|
||||
and after the final iteration of the simplex algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
The independent variable vector which optimizes the linear
|
||||
programming problem.
|
||||
fun : float
|
||||
Value of the objective function.
|
||||
success : bool
|
||||
True if the algorithm succeeded in finding an optimal solution.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable corresponds
|
||||
to an inequality constraint. If the slack is zero, then the
|
||||
corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints, that is,
|
||||
``b - A_eq @ x``.
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization:
|
||||
|
||||
``0`` : Optimization terminated successfully
|
||||
|
||||
``1`` : Iteration limit reached
|
||||
|
||||
``2`` : Problem appears to be infeasible
|
||||
|
||||
``3`` : Problem appears to be unbounded
|
||||
|
||||
``4`` : Serious numerical difficulties encountered
|
||||
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
"""
|
||||
nit = res['nit']
|
||||
x = res['x']
|
||||
|
||||
if nit == 0:
|
||||
print("Iter: X:")
|
||||
print(f"{nit: <5d} ", end="")
|
||||
print(x)
|
||||
|
||||
|
||||
def linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
||||
bounds=(0, None), method='highs', callback=None,
|
||||
options=None, x0=None, integrality=None):
|
||||
r"""
|
||||
Linear programming: minimize a linear objective function subject to linear
|
||||
equality and inequality constraints.
|
||||
|
||||
Linear programming solves problems of the following form:
|
||||
|
||||
.. math::
|
||||
|
||||
\min_x \ & c^T x \\
|
||||
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
||||
& A_{eq} x = b_{eq},\\
|
||||
& l \leq x \leq u ,
|
||||
|
||||
where :math:`x` is a vector of decision variables; :math:`c`,
|
||||
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
||||
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
||||
|
||||
Alternatively, that's:
|
||||
|
||||
- minimize ::
|
||||
|
||||
c @ x
|
||||
|
||||
- such that ::
|
||||
|
||||
A_ub @ x <= b_ub
|
||||
A_eq @ x == b_eq
|
||||
lb <= x <= ub
|
||||
|
||||
Note that by default ``lb = 0`` and ``ub = None``. Other bounds can be
|
||||
specified with ``bounds``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array
|
||||
The coefficients of the linear objective function to be minimized.
|
||||
A_ub : 2-D array, optional
|
||||
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
||||
coefficients of a linear inequality constraint on ``x``.
|
||||
b_ub : 1-D array, optional
|
||||
The inequality constraint vector. Each element represents an
|
||||
upper bound on the corresponding value of ``A_ub @ x``.
|
||||
A_eq : 2-D array, optional
|
||||
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
||||
coefficients of a linear equality constraint on ``x``.
|
||||
b_eq : 1-D array, optional
|
||||
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
||||
the corresponding element of ``b_eq``.
|
||||
bounds : sequence, optional
|
||||
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
||||
the minimum and maximum values of that decision variable.
|
||||
If a single tuple ``(min, max)`` is provided, then ``min`` and ``max``
|
||||
will serve as bounds for all decision variables.
|
||||
Use ``None`` to indicate that there is no bound. For instance, the
|
||||
default bound ``(0, None)`` means that all decision variables are
|
||||
non-negative, and the pair ``(None, None)`` means no bounds at all,
|
||||
i.e. all variables are allowed to be any real.
|
||||
method : str, optional
|
||||
The algorithm used to solve the standard form problem.
|
||||
The following are supported.
|
||||
|
||||
- :ref:`'highs' <optimize.linprog-highs>` (default)
|
||||
- :ref:`'highs-ds' <optimize.linprog-highs-ds>`
|
||||
- :ref:`'highs-ipm' <optimize.linprog-highs-ipm>`
|
||||
- :ref:`'interior-point' <optimize.linprog-interior-point>` (legacy)
|
||||
- :ref:`'revised simplex' <optimize.linprog-revised_simplex>` (legacy)
|
||||
- :ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
||||
|
||||
The legacy methods are deprecated and will be removed in SciPy 1.11.0.
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called at least once per
|
||||
iteration of the algorithm. The callback function must accept a single
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
The current solution vector.
|
||||
fun : float
|
||||
The current value of the objective function ``c @ x``.
|
||||
success : bool
|
||||
``True`` when the algorithm has completed successfully.
|
||||
slack : 1-D array
|
||||
The (nominally positive) values of the slack,
|
||||
``b_ub - A_ub @ x``.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
``b_eq - A_eq @ x``.
|
||||
phase : int
|
||||
The phase of the algorithm being executed.
|
||||
status : int
|
||||
An integer representing the status of the algorithm.
|
||||
|
||||
``0`` : Optimization proceeding nominally.
|
||||
|
||||
``1`` : Iteration limit reached.
|
||||
|
||||
``2`` : Problem appears to be infeasible.
|
||||
|
||||
``3`` : Problem appears to be unbounded.
|
||||
|
||||
``4`` : Numerical difficulties encountered.
|
||||
|
||||
nit : int
|
||||
The current iteration number.
|
||||
message : str
|
||||
A string descriptor of the algorithm status.
|
||||
|
||||
Callback functions are not currently supported by the HiGHS methods.
|
||||
|
||||
options : dict, optional
|
||||
A dictionary of solver options. All methods accept the following
|
||||
options:
|
||||
|
||||
maxiter : int
|
||||
Maximum number of iterations to perform.
|
||||
Default: see method-specific documentation.
|
||||
disp : bool
|
||||
Set to ``True`` to print convergence messages.
|
||||
Default: ``False``.
|
||||
presolve : bool
|
||||
Set to ``False`` to disable automatic presolve.
|
||||
Default: ``True``.
|
||||
|
||||
All methods except the HiGHS solvers also accept:
|
||||
|
||||
tol : float
|
||||
A tolerance which determines when a residual is "close enough" to
|
||||
zero to be considered exactly zero.
|
||||
autoscale : bool
|
||||
Set to ``True`` to automatically perform equilibration.
|
||||
Consider using this option if the numerical values in the
|
||||
constraints are separated by several orders of magnitude.
|
||||
Default: ``False``.
|
||||
rr : bool
|
||||
Set to ``False`` to disable automatic redundancy removal.
|
||||
Default: ``True``.
|
||||
rr_method : string
|
||||
Method used to identify and remove redundant rows from the
|
||||
equality constraint matrix after presolve. For problems with
|
||||
dense input, the available methods for redundancy removal are:
|
||||
|
||||
``SVD``:
|
||||
Repeatedly performs singular value decomposition on
|
||||
the matrix, detecting redundant rows based on nonzeros
|
||||
in the left singular vectors that correspond with
|
||||
zero singular values. May be fast when the matrix is
|
||||
nearly full rank.
|
||||
``pivot``:
|
||||
Uses the algorithm presented in [5]_ to identify
|
||||
redundant rows.
|
||||
``ID``:
|
||||
Uses a randomized interpolative decomposition.
|
||||
Identifies columns of the matrix transpose not used in
|
||||
a full-rank interpolative decomposition of the matrix.
|
||||
``None``:
|
||||
Uses ``svd`` if the matrix is nearly full rank, that is,
|
||||
the difference between the matrix rank and the number
|
||||
of rows is less than five. If not, uses ``pivot``. The
|
||||
behavior of this default is subject to change without
|
||||
prior notice.
|
||||
|
||||
Default: None.
|
||||
For problems with sparse input, this option is ignored, and the
|
||||
pivot-based algorithm presented in [5]_ is used.
|
||||
|
||||
For method-specific options, see
|
||||
:func:`show_options('linprog') <show_options>`.
|
||||
|
||||
x0 : 1-D array, optional
|
||||
Guess values of the decision variables, which will be refined by
|
||||
the optimization algorithm. This argument is currently used only by the
|
||||
:ref:`'revised simplex' <optimize.linprog-revised_simplex>` method,
|
||||
and can only be used if `x0` represents a basic feasible solution.
|
||||
|
||||
integrality : 1-D array or int, optional
|
||||
Indicates the type of integrality constraint on each decision variable.
|
||||
|
||||
``0`` : Continuous variable; no integrality constraint.
|
||||
|
||||
``1`` : Integer variable; decision variable must be an integer
|
||||
within `bounds`.
|
||||
|
||||
``2`` : Semi-continuous variable; decision variable must be within
|
||||
`bounds` or take value ``0``.
|
||||
|
||||
``3`` : Semi-integer variable; decision variable must be an integer
|
||||
within `bounds` or take value ``0``.
|
||||
|
||||
By default, all variables are continuous.
|
||||
|
||||
For mixed integrality constraints, supply an array of shape ``c.shape``.
|
||||
To infer a constraint on each decision variable from shorter inputs,
|
||||
the argument will be broadcast to ``c.shape`` using `numpy.broadcast_to`.
|
||||
|
||||
This argument is currently used only by the
|
||||
:ref:`'highs' <optimize.linprog-highs>` method and is ignored otherwise.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
A :class:`scipy.optimize.OptimizeResult` consisting of the fields
|
||||
below. Note that the return types of the fields may depend on whether
|
||||
the optimization was successful, therefore it is recommended to check
|
||||
`OptimizeResult.status` before relying on the other fields:
|
||||
|
||||
x : 1-D array
|
||||
The values of the decision variables that minimizes the
|
||||
objective function while satisfying the constraints.
|
||||
fun : float
|
||||
The optimal value of the objective function ``c @ x``.
|
||||
slack : 1-D array
|
||||
The (nominally positive) values of the slack variables,
|
||||
``b_ub - A_ub @ x``.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
``b_eq - A_eq @ x``.
|
||||
success : bool
|
||||
``True`` when the algorithm succeeds in finding an optimal
|
||||
solution.
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
``0`` : Optimization terminated successfully.
|
||||
|
||||
``1`` : Iteration limit reached.
|
||||
|
||||
``2`` : Problem appears to be infeasible.
|
||||
|
||||
``3`` : Problem appears to be unbounded.
|
||||
|
||||
``4`` : Numerical difficulties encountered.
|
||||
|
||||
nit : int
|
||||
The total number of iterations performed in all phases.
|
||||
message : str
|
||||
A string descriptor of the exit status of the algorithm.
|
||||
|
||||
See Also
|
||||
--------
|
||||
show_options : Additional options accepted by the solvers.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This section describes the available solvers that can be selected by the
|
||||
'method' parameter.
|
||||
|
||||
:ref:`'highs-ds' <optimize.linprog-highs-ds>`, and
|
||||
:ref:`'highs-ipm' <optimize.linprog-highs-ipm>` are interfaces to the
|
||||
HiGHS simplex and interior-point method solvers [13]_, respectively.
|
||||
:ref:`'highs' <optimize.linprog-highs>` (default) chooses between
|
||||
the two automatically. These are the fastest linear
|
||||
programming solvers in SciPy, especially for large, sparse problems;
|
||||
which of these two is faster is problem-dependent.
|
||||
The other solvers are legacy methods and will be removed when `callback` is
|
||||
supported by the HiGHS methods.
|
||||
|
||||
Method :ref:`'highs-ds' <optimize.linprog-highs-ds>`, is a wrapper of the C++ high
|
||||
performance dual revised simplex implementation (HSOL) [13]_, [14]_.
|
||||
Method :ref:`'highs-ipm' <optimize.linprog-highs-ipm>` is a wrapper of a C++
|
||||
implementation of an **i**\ nterior-\ **p**\ oint **m**\ ethod [13]_; it
|
||||
features a crossover routine, so it is as accurate as a simplex solver.
|
||||
Method :ref:`'highs' <optimize.linprog-highs>` chooses between the two
|
||||
automatically.
|
||||
For new code involving `linprog`, we recommend explicitly choosing one of
|
||||
these three method values.
|
||||
|
||||
.. versionadded:: 1.6.0
|
||||
|
||||
Method :ref:`'interior-point' <optimize.linprog-interior-point>`
|
||||
uses the primal-dual path following algorithm
|
||||
as outlined in [4]_. This algorithm supports sparse constraint matrices and
|
||||
is typically faster than the simplex methods, especially for large, sparse
|
||||
problems. Note, however, that the solution returned may be slightly less
|
||||
accurate than those of the simplex methods and will not, in general,
|
||||
correspond with a vertex of the polytope defined by the constraints.
|
||||
|
||||
.. versionadded:: 1.0.0
|
||||
|
||||
Method :ref:`'revised simplex' <optimize.linprog-revised_simplex>`
|
||||
uses the revised simplex method as described in
|
||||
[9]_, except that a factorization [11]_ of the basis matrix, rather than
|
||||
its inverse, is efficiently maintained and used to solve the linear systems
|
||||
at each iteration of the algorithm.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
Method :ref:`'simplex' <optimize.linprog-simplex>` uses a traditional,
|
||||
full-tableau implementation of
|
||||
Dantzig's simplex algorithm [1]_, [2]_ (*not* the
|
||||
Nelder-Mead simplex). This algorithm is included for backwards
|
||||
compatibility and educational purposes.
|
||||
|
||||
.. versionadded:: 0.15.0
|
||||
|
||||
Before applying :ref:`'interior-point' <optimize.linprog-interior-point>`,
|
||||
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, or
|
||||
:ref:`'simplex' <optimize.linprog-simplex>`,
|
||||
a presolve procedure based on [8]_ attempts
|
||||
to identify trivial infeasibilities, trivial unboundedness, and potential
|
||||
problem simplifications. Specifically, it checks for:
|
||||
|
||||
- rows of zeros in ``A_eq`` or ``A_ub``, representing trivial constraints;
|
||||
- columns of zeros in ``A_eq`` `and` ``A_ub``, representing unconstrained
|
||||
variables;
|
||||
- column singletons in ``A_eq``, representing fixed variables; and
|
||||
- column singletons in ``A_ub``, representing simple bounds.
|
||||
|
||||
If presolve reveals that the problem is unbounded (e.g. an unconstrained
|
||||
and unbounded variable has negative cost) or infeasible (e.g., a row of
|
||||
zeros in ``A_eq`` corresponds with a nonzero in ``b_eq``), the solver
|
||||
terminates with the appropriate status code. Note that presolve terminates
|
||||
as soon as any sign of unboundedness is detected; consequently, a problem
|
||||
may be reported as unbounded when in reality the problem is infeasible
|
||||
(but infeasibility has not been detected yet). Therefore, if it is
|
||||
important to know whether the problem is actually infeasible, solve the
|
||||
problem again with option ``presolve=False``.
|
||||
|
||||
If neither infeasibility nor unboundedness are detected in a single pass
|
||||
of the presolve, bounds are tightened where possible and fixed
|
||||
variables are removed from the problem. Then, linearly dependent rows
|
||||
of the ``A_eq`` matrix are removed, (unless they represent an
|
||||
infeasibility) to avoid numerical difficulties in the primary solve
|
||||
routine. Note that rows that are nearly linearly dependent (within a
|
||||
prescribed tolerance) may also be removed, which can change the optimal
|
||||
solution in rare cases. If this is a concern, eliminate redundancy from
|
||||
your problem formulation and run with option ``rr=False`` or
|
||||
``presolve=False``.
|
||||
|
||||
Several potential improvements can be made here: additional presolve
|
||||
checks outlined in [8]_ should be implemented, the presolve routine should
|
||||
be run multiple times (until no further simplifications can be made), and
|
||||
more of the efficiency improvements from [5]_ should be implemented in the
|
||||
redundancy removal routines.
|
||||
|
||||
After presolve, the problem is transformed to standard form by converting
|
||||
the (tightened) simple bounds to upper bound constraints, introducing
|
||||
non-negative slack variables for inequality constraints, and expressing
|
||||
unbounded variables as the difference between two non-negative variables.
|
||||
Optionally, the problem is automatically scaled via equilibration [12]_.
|
||||
The selected algorithm solves the standard form problem, and a
|
||||
postprocessing routine converts the result to a solution to the original
|
||||
problem.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
||||
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
||||
1963
|
||||
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
||||
Mathematical Programming", McGraw-Hill, Chapter 4.
|
||||
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
||||
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
||||
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
||||
optimizer for linear programming: an implementation of the
|
||||
homogeneous algorithm." High performance optimization. Springer US,
|
||||
2000. 197-232.
|
||||
.. [5] Andersen, Erling D. "Finding all linearly dependent rows in
|
||||
large-scale linear programming." Optimization Methods and Software
|
||||
6.3 (1995): 219-227.
|
||||
.. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
|
||||
Programming based on Newton's Method." Unpublished Course Notes,
|
||||
March 2004. Available 2/25/2017 at
|
||||
https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
|
||||
.. [7] Fourer, Robert. "Solving Linear Programs by Interior-Point Methods."
|
||||
Unpublished Course Notes, August 26, 2005. Available 2/25/2017 at
|
||||
http://www.4er.org/CourseNotes/Book%20B/B-III.pdf
|
||||
.. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
|
||||
programming." Mathematical Programming 71.2 (1995): 221-245.
|
||||
.. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
||||
programming." Athena Scientific 1 (1997): 997.
|
||||
.. [10] Andersen, Erling D., et al. Implementation of interior point
|
||||
methods for large scale linear programming. HEC/Universite de
|
||||
Geneve, 1996.
|
||||
.. [11] Bartels, Richard H. "A stabilization of the simplex method."
|
||||
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
||||
.. [12] Tomlin, J. A. "On scaling linear programming problems."
|
||||
Mathematical Programming Study 4 (1975): 146-166.
|
||||
.. [13] Huangfu, Q., Galabova, I., Feldmeier, M., and Hall, J. A. J.
|
||||
"HiGHS - high performance software for linear optimization."
|
||||
https://highs.dev/
|
||||
.. [14] Huangfu, Q. and Hall, J. A. J. "Parallelizing the dual revised
|
||||
simplex method." Mathematical Programming Computation, 10 (1),
|
||||
119-142, 2018. DOI: 10.1007/s12532-017-0130-5
|
||||
|
||||
Examples
|
||||
--------
|
||||
Consider the following problem:
|
||||
|
||||
.. math::
|
||||
|
||||
\min_{x_0, x_1} \ -x_0 + 4x_1 & \\
|
||||
\mbox{such that} \ -3x_0 + x_1 & \leq 6,\\
|
||||
-x_0 - 2x_1 & \geq -4,\\
|
||||
x_1 & \geq -3.
|
||||
|
||||
The problem is not presented in the form accepted by `linprog`. This is
|
||||
easily remedied by converting the "greater than" inequality
|
||||
constraint to a "less than" inequality constraint by
|
||||
multiplying both sides by a factor of :math:`-1`. Note also that the last
|
||||
constraint is really the simple bound :math:`-3 \leq x_1 \leq \infty`.
|
||||
Finally, since there are no bounds on :math:`x_0`, we must explicitly
|
||||
specify the bounds :math:`-\infty \leq x_0 \leq \infty`, as the
|
||||
default is for variables to be non-negative. After collecting coeffecients
|
||||
into arrays and tuples, the input for this problem is:
|
||||
|
||||
>>> from scipy.optimize import linprog
|
||||
>>> c = [-1, 4]
|
||||
>>> A = [[-3, 1], [1, 2]]
|
||||
>>> b = [6, 4]
|
||||
>>> x0_bounds = (None, None)
|
||||
>>> x1_bounds = (-3, None)
|
||||
>>> res = linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds])
|
||||
>>> res.fun
|
||||
-22.0
|
||||
>>> res.x
|
||||
array([10., -3.])
|
||||
>>> res.message
|
||||
'Optimization terminated successfully. (HiGHS Status 7: Optimal)'
|
||||
|
||||
The marginals (AKA dual values / shadow prices / Lagrange multipliers)
|
||||
and residuals (slacks) are also available.
|
||||
|
||||
>>> res.ineqlin
|
||||
residual: [ 3.900e+01 0.000e+00]
|
||||
marginals: [-0.000e+00 -1.000e+00]
|
||||
|
||||
For example, because the marginal associated with the second inequality
|
||||
constraint is -1, we expect the optimal value of the objective function
|
||||
to decrease by ``eps`` if we add a small amount ``eps`` to the right hand
|
||||
side of the second inequality constraint:
|
||||
|
||||
>>> eps = 0.05
|
||||
>>> b[1] += eps
|
||||
>>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
|
||||
-22.05
|
||||
|
||||
Also, because the residual on the first inequality constraint is 39, we
|
||||
can decrease the right hand side of the first constraint by 39 without
|
||||
affecting the optimal solution.
|
||||
|
||||
>>> b = [6, 4] # reset to original values
|
||||
>>> b[0] -= 39
|
||||
>>> linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds]).fun
|
||||
-22.0
|
||||
|
||||
"""
|
||||
|
||||
meth = method.lower()
|
||||
methods = {"highs", "highs-ds", "highs-ipm",
|
||||
"simplex", "revised simplex", "interior-point"}
|
||||
|
||||
if meth not in methods:
|
||||
raise ValueError(f"Unknown solver '{method}'")
|
||||
|
||||
if x0 is not None and meth != "revised simplex":
|
||||
warning_message = "x0 is used only when method is 'revised simplex'. "
|
||||
warn(warning_message, OptimizeWarning, stacklevel=2)
|
||||
|
||||
if np.any(integrality) and not meth == "highs":
|
||||
integrality = None
|
||||
warning_message = ("Only `method='highs'` supports integer "
|
||||
"constraints. Ignoring `integrality`.")
|
||||
warn(warning_message, OptimizeWarning, stacklevel=2)
|
||||
elif np.any(integrality):
|
||||
integrality = np.broadcast_to(integrality, np.shape(c))
|
||||
else:
|
||||
integrality = None
|
||||
|
||||
lp = _LPProblem(c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality)
|
||||
lp, solver_options = _parse_linprog(lp, options, meth)
|
||||
tol = solver_options.get('tol', 1e-9)
|
||||
|
||||
# Give unmodified problem to HiGHS
|
||||
if meth.startswith('highs'):
|
||||
if callback is not None:
|
||||
raise NotImplementedError("HiGHS solvers do not support the "
|
||||
"callback interface.")
|
||||
highs_solvers = {'highs-ipm': 'ipm', 'highs-ds': 'simplex',
|
||||
'highs': None}
|
||||
|
||||
sol = _linprog_highs(lp, solver=highs_solvers[meth],
|
||||
**solver_options)
|
||||
sol['status'], sol['message'] = (
|
||||
_check_result(sol['x'], sol['fun'], sol['status'], sol['slack'],
|
||||
sol['con'], lp.bounds, tol, sol['message'],
|
||||
integrality))
|
||||
sol['success'] = sol['status'] == 0
|
||||
return OptimizeResult(sol)
|
||||
|
||||
warn(f"`method='{meth}'` is deprecated and will be removed in SciPy "
|
||||
"1.11.0. Please use one of the HiGHS solvers (e.g. "
|
||||
"`method='highs'`) in new code.", DeprecationWarning, stacklevel=2)
|
||||
|
||||
iteration = 0
|
||||
complete = False # will become True if solved in presolve
|
||||
undo = []
|
||||
|
||||
# Keep the original arrays to calculate slack/residuals for original
|
||||
# problem.
|
||||
lp_o = deepcopy(lp)
|
||||
|
||||
# Solve trivial problem, eliminate variables, tighten bounds, etc.
|
||||
rr_method = solver_options.pop('rr_method', None) # need to pop these;
|
||||
rr = solver_options.pop('rr', True) # they're not passed to methods
|
||||
c0 = 0 # we might get a constant term in the objective
|
||||
if solver_options.pop('presolve', True):
|
||||
(lp, c0, x, undo, complete, status, message) = _presolve(lp, rr,
|
||||
rr_method,
|
||||
tol)
|
||||
|
||||
C, b_scale = 1, 1 # for trivial unscaling if autoscale is not used
|
||||
postsolve_args = (lp_o._replace(bounds=lp.bounds), undo, C, b_scale)
|
||||
|
||||
if not complete:
|
||||
A, b, c, c0, x0 = _get_Abc(lp, c0)
|
||||
if solver_options.pop('autoscale', False):
|
||||
A, b, c, x0, C, b_scale = _autoscale(A, b, c, x0)
|
||||
postsolve_args = postsolve_args[:-2] + (C, b_scale)
|
||||
|
||||
if meth == 'simplex':
|
||||
x, status, message, iteration = _linprog_simplex(
|
||||
c, c0=c0, A=A, b=b, callback=callback,
|
||||
postsolve_args=postsolve_args, **solver_options)
|
||||
elif meth == 'interior-point':
|
||||
x, status, message, iteration = _linprog_ip(
|
||||
c, c0=c0, A=A, b=b, callback=callback,
|
||||
postsolve_args=postsolve_args, **solver_options)
|
||||
elif meth == 'revised simplex':
|
||||
x, status, message, iteration = _linprog_rs(
|
||||
c, c0=c0, A=A, b=b, x0=x0, callback=callback,
|
||||
postsolve_args=postsolve_args, **solver_options)
|
||||
|
||||
# Eliminate artificial variables, re-introduce presolved variables, etc.
|
||||
disp = solver_options.get('disp', False)
|
||||
|
||||
x, fun, slack, con = _postsolve(x, postsolve_args, complete)
|
||||
|
||||
status, message = _check_result(x, fun, status, slack, con, lp_o.bounds,
|
||||
tol, message, integrality)
|
||||
|
||||
if disp:
|
||||
_display_summary(message, status, fun, iteration)
|
||||
|
||||
sol = {
|
||||
'x': x,
|
||||
'fun': fun,
|
||||
'slack': slack,
|
||||
'con': con,
|
||||
'status': status,
|
||||
'message': message,
|
||||
'nit': iteration,
|
||||
'success': status == 0}
|
||||
|
||||
return OptimizeResult(sol)
|
||||
1434
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_doc.py
Normal file
1434
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_doc.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,422 @@
|
|||
"""HiGHS Linear Optimization Methods
|
||||
|
||||
Interface to HiGHS linear optimization software.
|
||||
https://highs.dev/
|
||||
|
||||
.. versionadded:: 1.5.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Q. Huangfu and J.A.J. Hall. "Parallelizing the dual revised simplex
|
||||
method." Mathematical Programming Computation, 10 (1), 119-142,
|
||||
2018. DOI: 10.1007/s12532-017-0130-5
|
||||
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import numpy as np
|
||||
from ._optimize import OptimizeWarning, OptimizeResult
|
||||
from warnings import warn
|
||||
from ._highspy._highs_wrapper import _highs_wrapper
|
||||
from ._highspy._core import(
|
||||
kHighsInf,
|
||||
HighsDebugLevel,
|
||||
ObjSense,
|
||||
HighsModelStatus,
|
||||
simplex_constants as s_c, # [1]
|
||||
)
|
||||
from scipy.sparse import csc_array, vstack, issparse
|
||||
|
||||
# [1]: Directly importing from "._highspy._core.simplex_constants"
|
||||
# causes problems when reloading.
|
||||
# See https://github.com/scipy/scipy/pull/22869 for details.
|
||||
|
||||
def _highs_to_scipy_status_message(highs_status, highs_message):
|
||||
"""Converts HiGHS status number/message to SciPy status number/message"""
|
||||
|
||||
scipy_statuses_messages = {
|
||||
None: (4, "HiGHS did not provide a status code. "),
|
||||
HighsModelStatus.kNotset: (4, ""),
|
||||
HighsModelStatus.kLoadError: (4, ""),
|
||||
HighsModelStatus.kModelError: (2, ""),
|
||||
HighsModelStatus.kPresolveError: (4, ""),
|
||||
HighsModelStatus.kSolveError: (4, ""),
|
||||
HighsModelStatus.kPostsolveError: (4, ""),
|
||||
HighsModelStatus.kModelEmpty: (4, ""),
|
||||
HighsModelStatus.kObjectiveBound: (4, ""),
|
||||
HighsModelStatus.kObjectiveTarget: (4, ""),
|
||||
HighsModelStatus.kOptimal: (0, "Optimization terminated successfully. "),
|
||||
HighsModelStatus.kTimeLimit: (1, "Time limit reached. "),
|
||||
HighsModelStatus.kIterationLimit: (1, "Iteration limit reached. "),
|
||||
HighsModelStatus.kInfeasible: (2, "The problem is infeasible. "),
|
||||
HighsModelStatus.kUnbounded: (3, "The problem is unbounded. "),
|
||||
HighsModelStatus.kUnboundedOrInfeasible: (4, "The problem is unbounded "
|
||||
"or infeasible. ")}
|
||||
unrecognized = (4, "The HiGHS status code was not recognized. ")
|
||||
scipy_status, scipy_message = (
|
||||
scipy_statuses_messages.get(highs_status, unrecognized))
|
||||
hstat = int(highs_status) if highs_status is not None else None
|
||||
scipy_message = (f"{scipy_message}"
|
||||
f"(HiGHS Status {hstat}: {highs_message})")
|
||||
return scipy_status, scipy_message
|
||||
|
||||
|
||||
def _replace_inf(x):
|
||||
# Replace `np.inf` with kHighsInf
|
||||
infs = np.isinf(x)
|
||||
with np.errstate(invalid="ignore"):
|
||||
x[infs] = np.sign(x[infs])*kHighsInf
|
||||
return x
|
||||
|
||||
|
||||
def _convert_to_highs_enum(option, option_str, choices):
|
||||
# If option is in the choices we can look it up, if not use
|
||||
# the default value taken from function signature and warn:
|
||||
try:
|
||||
return choices[option.lower()]
|
||||
except AttributeError:
|
||||
return choices[option]
|
||||
except KeyError:
|
||||
sig = inspect.signature(_linprog_highs)
|
||||
default_str = sig.parameters[option_str].default
|
||||
warn(f"Option {option_str} is {option}, but only values in "
|
||||
f"{set(choices.keys())} are allowed. Using default: "
|
||||
f"{default_str}.",
|
||||
OptimizeWarning, stacklevel=3)
|
||||
return choices[default_str]
|
||||
|
||||
|
||||
def _linprog_highs(lp, solver, time_limit=None, presolve=True,
|
||||
disp=False, maxiter=None,
|
||||
dual_feasibility_tolerance=None,
|
||||
primal_feasibility_tolerance=None,
|
||||
ipm_optimality_tolerance=None,
|
||||
simplex_dual_edge_weight_strategy=None,
|
||||
mip_rel_gap=None,
|
||||
mip_max_nodes=None,
|
||||
**unknown_options):
|
||||
r"""
|
||||
Solve the following linear programming problem using one of the HiGHS
|
||||
solvers:
|
||||
|
||||
User-facing documentation is in _linprog_doc.py.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lp : _LPProblem
|
||||
A ``scipy.optimize._linprog_util._LPProblem`` ``namedtuple``.
|
||||
solver : "ipm" or "simplex" or None
|
||||
Which HiGHS solver to use. If ``None``, "simplex" will be used.
|
||||
|
||||
Options
|
||||
-------
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform in either phase. For
|
||||
``solver='ipm'``, this does not include the number of crossover
|
||||
iterations. Default is the largest possible value for an ``int``
|
||||
on the platform.
|
||||
disp : bool
|
||||
Set to ``True`` if indicators of optimization status are to be printed
|
||||
to the console each iteration; default ``False``.
|
||||
time_limit : float
|
||||
The maximum time in seconds allotted to solve the problem; default is
|
||||
the largest possible value for a ``double`` on the platform.
|
||||
presolve : bool
|
||||
Presolve attempts to identify trivial infeasibilities,
|
||||
identify trivial unboundedness, and simplify the problem before
|
||||
sending it to the main solver. It is generally recommended
|
||||
to keep the default setting ``True``; set to ``False`` if presolve is
|
||||
to be disabled.
|
||||
dual_feasibility_tolerance : double
|
||||
Dual feasibility tolerance. Default is 1e-07.
|
||||
The minimum of this and ``primal_feasibility_tolerance``
|
||||
is used for the feasibility tolerance when ``solver='ipm'``.
|
||||
primal_feasibility_tolerance : double
|
||||
Primal feasibility tolerance. Default is 1e-07.
|
||||
The minimum of this and ``dual_feasibility_tolerance``
|
||||
is used for the feasibility tolerance when ``solver='ipm'``.
|
||||
ipm_optimality_tolerance : double
|
||||
Optimality tolerance for ``solver='ipm'``. Default is 1e-08.
|
||||
Minimum possible value is 1e-12 and must be smaller than the largest
|
||||
possible value for a ``double`` on the platform.
|
||||
simplex_dual_edge_weight_strategy : str (default: None)
|
||||
Strategy for simplex dual edge weights. The default, ``None``,
|
||||
automatically selects one of the following.
|
||||
|
||||
``'dantzig'`` uses Dantzig's original strategy of choosing the most
|
||||
negative reduced cost.
|
||||
|
||||
``'devex'`` uses the strategy described in [15]_.
|
||||
|
||||
``steepest`` uses the exact steepest edge strategy as described in
|
||||
[16]_.
|
||||
|
||||
``'steepest-devex'`` begins with the exact steepest edge strategy
|
||||
until the computation is too costly or inexact and then switches to
|
||||
the devex method.
|
||||
|
||||
Currently, using ``None`` always selects ``'steepest-devex'``, but this
|
||||
may change as new options become available.
|
||||
|
||||
mip_max_nodes : int
|
||||
The maximum number of nodes allotted to solve the problem; default is
|
||||
the largest possible value for a ``HighsInt`` on the platform.
|
||||
Ignored if not using the MIP solver.
|
||||
unknown_options : dict
|
||||
Optional arguments not used by this particular solver. If
|
||||
``unknown_options`` is non-empty, a warning is issued listing all
|
||||
unused options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
sol : dict
|
||||
A dictionary consisting of the fields:
|
||||
|
||||
x : 1D array
|
||||
The values of the decision variables that minimizes the
|
||||
objective function while satisfying the constraints.
|
||||
fun : float
|
||||
The optimal value of the objective function ``c @ x``.
|
||||
slack : 1D array
|
||||
The (nominally positive) values of the slack,
|
||||
``b_ub - A_ub @ x``.
|
||||
con : 1D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
``b_eq - A_eq @ x``.
|
||||
success : bool
|
||||
``True`` when the algorithm succeeds in finding an optimal
|
||||
solution.
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
``0`` : Optimization terminated successfully.
|
||||
|
||||
``1`` : Iteration or time limit reached.
|
||||
|
||||
``2`` : Problem appears to be infeasible.
|
||||
|
||||
``3`` : Problem appears to be unbounded.
|
||||
|
||||
``4`` : The HiGHS solver ran into a problem.
|
||||
|
||||
message : str
|
||||
A string descriptor of the exit status of the algorithm.
|
||||
nit : int
|
||||
The total number of iterations performed.
|
||||
For ``solver='simplex'``, this includes iterations in all
|
||||
phases. For ``solver='ipm'``, this does not include
|
||||
crossover iterations.
|
||||
crossover_nit : int
|
||||
The number of primal/dual pushes performed during the
|
||||
crossover routine for ``solver='ipm'``. This is ``0``
|
||||
for ``solver='simplex'``.
|
||||
ineqlin : OptimizeResult
|
||||
Solution and sensitivity information corresponding to the
|
||||
inequality constraints, `b_ub`. A dictionary consisting of the
|
||||
fields:
|
||||
|
||||
residual : np.ndnarray
|
||||
The (nominally positive) values of the slack variables,
|
||||
``b_ub - A_ub @ x``. This quantity is also commonly
|
||||
referred to as "slack".
|
||||
|
||||
marginals : np.ndarray
|
||||
The sensitivity (partial derivative) of the objective
|
||||
function with respect to the right-hand side of the
|
||||
inequality constraints, `b_ub`.
|
||||
|
||||
eqlin : OptimizeResult
|
||||
Solution and sensitivity information corresponding to the
|
||||
equality constraints, `b_eq`. A dictionary consisting of the
|
||||
fields:
|
||||
|
||||
residual : np.ndarray
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
``b_eq - A_eq @ x``.
|
||||
|
||||
marginals : np.ndarray
|
||||
The sensitivity (partial derivative) of the objective
|
||||
function with respect to the right-hand side of the
|
||||
equality constraints, `b_eq`.
|
||||
|
||||
lower, upper : OptimizeResult
|
||||
Solution and sensitivity information corresponding to the
|
||||
lower and upper bounds on decision variables, `bounds`.
|
||||
|
||||
residual : np.ndarray
|
||||
The (nominally positive) values of the quantity
|
||||
``x - lb`` (lower) or ``ub - x`` (upper).
|
||||
|
||||
marginals : np.ndarray
|
||||
The sensitivity (partial derivative) of the objective
|
||||
function with respect to the lower and upper
|
||||
`bounds`.
|
||||
|
||||
mip_node_count : int
|
||||
The number of subproblems or "nodes" solved by the MILP
|
||||
solver. Only present when `integrality` is not `None`.
|
||||
|
||||
mip_dual_bound : float
|
||||
The MILP solver's final estimate of the lower bound on the
|
||||
optimal solution. Only present when `integrality` is not
|
||||
`None`.
|
||||
|
||||
mip_gap : float
|
||||
The difference between the final objective function value
|
||||
and the final dual bound, scaled by the final objective
|
||||
function value. Only present when `integrality` is not
|
||||
`None`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The result fields `ineqlin`, `eqlin`, `lower`, and `upper` all contain
|
||||
`marginals`, or partial derivatives of the objective function with respect
|
||||
to the right-hand side of each constraint. These partial derivatives are
|
||||
also referred to as "Lagrange multipliers", "dual values", and
|
||||
"shadow prices". The sign convention of `marginals` is opposite that
|
||||
of Lagrange multipliers produced by many nonlinear solvers.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [15] Harris, Paula MJ. "Pivot selection methods of the Devex LP code."
|
||||
Mathematical programming 5.1 (1973): 1-28.
|
||||
.. [16] Goldfarb, Donald, and John Ker Reid. "A practicable steepest-edge
|
||||
simplex algorithm." Mathematical Programming 12.1 (1977): 361-371.
|
||||
"""
|
||||
if unknown_options:
|
||||
message = (f"Unrecognized options detected: {unknown_options}. "
|
||||
"These will be passed to HiGHS verbatim.")
|
||||
warn(message, OptimizeWarning, stacklevel=3)
|
||||
|
||||
# Map options to HiGHS enum values
|
||||
simplex_dual_edge_weight_strategy_enum = _convert_to_highs_enum(
|
||||
simplex_dual_edge_weight_strategy,
|
||||
'simplex_dual_edge_weight_strategy',
|
||||
choices={'dantzig': \
|
||||
s_c.SimplexEdgeWeightStrategy.kSimplexEdgeWeightStrategyDantzig,
|
||||
'devex': \
|
||||
s_c.SimplexEdgeWeightStrategy.kSimplexEdgeWeightStrategyDevex,
|
||||
'steepest-devex': \
|
||||
s_c.SimplexEdgeWeightStrategy.kSimplexEdgeWeightStrategyChoose,
|
||||
'steepest': \
|
||||
s_c.SimplexEdgeWeightStrategy.kSimplexEdgeWeightStrategySteepestEdge,
|
||||
None: None})
|
||||
|
||||
c, A_ub, b_ub, A_eq, b_eq, bounds, x0, integrality = lp
|
||||
|
||||
lb, ub = bounds.T.copy() # separate bounds, copy->C-cntgs
|
||||
# highs_wrapper solves LHS <= A*x <= RHS, not equality constraints
|
||||
with np.errstate(invalid="ignore"):
|
||||
lhs_ub = -np.ones_like(b_ub)*np.inf # LHS of UB constraints is -inf
|
||||
rhs_ub = b_ub # RHS of UB constraints is b_ub
|
||||
lhs_eq = b_eq # Equality constraint is inequality
|
||||
rhs_eq = b_eq # constraint with LHS=RHS
|
||||
lhs = np.concatenate((lhs_ub, lhs_eq))
|
||||
rhs = np.concatenate((rhs_ub, rhs_eq))
|
||||
|
||||
if issparse(A_ub) or issparse(A_eq):
|
||||
A = vstack((A_ub, A_eq))
|
||||
else:
|
||||
A = np.vstack((A_ub, A_eq))
|
||||
A = csc_array(A)
|
||||
|
||||
options = {
|
||||
'presolve': presolve,
|
||||
'sense': ObjSense.kMinimize,
|
||||
'solver': solver,
|
||||
'time_limit': time_limit,
|
||||
'highs_debug_level': HighsDebugLevel.kHighsDebugLevelNone,
|
||||
'dual_feasibility_tolerance': dual_feasibility_tolerance,
|
||||
'ipm_optimality_tolerance': ipm_optimality_tolerance,
|
||||
'log_to_console': disp,
|
||||
'mip_max_nodes': mip_max_nodes,
|
||||
'output_flag': disp,
|
||||
'primal_feasibility_tolerance': primal_feasibility_tolerance,
|
||||
'simplex_dual_edge_weight_strategy':
|
||||
simplex_dual_edge_weight_strategy_enum,
|
||||
'simplex_strategy': s_c.SimplexStrategy.kSimplexStrategyDual,
|
||||
'ipm_iteration_limit': maxiter,
|
||||
'simplex_iteration_limit': maxiter,
|
||||
'mip_rel_gap': mip_rel_gap,
|
||||
}
|
||||
options.update(unknown_options)
|
||||
|
||||
# np.inf doesn't work; use very large constant
|
||||
rhs = _replace_inf(rhs)
|
||||
lhs = _replace_inf(lhs)
|
||||
lb = _replace_inf(lb)
|
||||
ub = _replace_inf(ub)
|
||||
|
||||
if integrality is None or np.sum(integrality) == 0:
|
||||
integrality = np.empty(0)
|
||||
else:
|
||||
integrality = np.array(integrality)
|
||||
|
||||
res = _highs_wrapper(c, A.indptr, A.indices, A.data, lhs, rhs,
|
||||
lb, ub, integrality.astype(np.uint8), options)
|
||||
|
||||
# HiGHS represents constraints as lhs/rhs, so
|
||||
# Ax + s = b => Ax = b - s
|
||||
# and we need to split up s by A_ub and A_eq
|
||||
if 'slack' in res:
|
||||
slack = res['slack']
|
||||
con = np.array(slack[len(b_ub):])
|
||||
slack = np.array(slack[:len(b_ub)])
|
||||
else:
|
||||
slack, con = None, None
|
||||
|
||||
# lagrange multipliers for equalities/inequalities and upper/lower bounds
|
||||
if 'lambda' in res:
|
||||
lamda = res['lambda']
|
||||
marg_ineqlin = np.array(lamda[:len(b_ub)])
|
||||
marg_eqlin = np.array(lamda[len(b_ub):])
|
||||
marg_upper = np.array(res['marg_bnds'][1, :])
|
||||
marg_lower = np.array(res['marg_bnds'][0, :])
|
||||
else:
|
||||
marg_ineqlin, marg_eqlin = None, None
|
||||
marg_upper, marg_lower = None, None
|
||||
|
||||
# this needs to be updated if we start choosing the solver intelligently
|
||||
|
||||
# Convert to scipy-style status and message
|
||||
highs_status = res.get('status', None)
|
||||
highs_message = res.get('message', None)
|
||||
status, message = _highs_to_scipy_status_message(highs_status,
|
||||
highs_message)
|
||||
|
||||
x = res['x'] # is None if not set
|
||||
sol = {'x': x,
|
||||
'slack': slack,
|
||||
'con': con,
|
||||
'ineqlin': OptimizeResult({
|
||||
'residual': slack,
|
||||
'marginals': marg_ineqlin,
|
||||
}),
|
||||
'eqlin': OptimizeResult({
|
||||
'residual': con,
|
||||
'marginals': marg_eqlin,
|
||||
}),
|
||||
'lower': OptimizeResult({
|
||||
'residual': None if x is None else x - lb,
|
||||
'marginals': marg_lower,
|
||||
}),
|
||||
'upper': OptimizeResult({
|
||||
'residual': None if x is None else ub - x,
|
||||
'marginals': marg_upper
|
||||
}),
|
||||
'fun': res.get('fun'),
|
||||
'status': status,
|
||||
'success': res['status'] == HighsModelStatus.kOptimal,
|
||||
'message': message,
|
||||
'nit': res.get('simplex_nit', 0) or res.get('ipm_nit', 0),
|
||||
'crossover_nit': res.get('crossover_nit'),
|
||||
}
|
||||
|
||||
if np.any(x) and integrality is not None:
|
||||
sol.update({
|
||||
'mip_node_count': res.get('mip_node_count', 0),
|
||||
'mip_dual_bound': res.get('mip_dual_bound', 0.0),
|
||||
'mip_gap': res.get('mip_gap', 0.0),
|
||||
})
|
||||
|
||||
return sol
|
||||
1141
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_ip.py
Normal file
1141
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_ip.py
Normal file
File diff suppressed because it is too large
Load diff
572
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_rs.py
Normal file
572
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_rs.py
Normal file
|
|
@ -0,0 +1,572 @@
|
|||
"""Revised simplex method for linear programming
|
||||
|
||||
The *revised simplex* method uses the method described in [1]_, except
|
||||
that a factorization [2]_ of the basis matrix, rather than its inverse,
|
||||
is efficiently maintained and used to solve the linear systems at each
|
||||
iteration of the algorithm.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
||||
programming." Athena Scientific 1 (1997): 997.
|
||||
.. [2] Bartels, Richard H. "A stabilization of the simplex method."
|
||||
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
||||
|
||||
"""
|
||||
# Author: Matt Haberland
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import LinAlgError
|
||||
|
||||
from scipy.linalg import solve
|
||||
from ._optimize import _check_unknown_options
|
||||
from ._bglu_dense import LU
|
||||
from ._bglu_dense import BGLU as BGLU
|
||||
from ._linprog_util import _postsolve
|
||||
from ._optimize import OptimizeResult
|
||||
|
||||
|
||||
def _phase_one(A, b, x0, callback, postsolve_args, maxiter, tol, disp,
|
||||
maxupdate, mast, pivot):
|
||||
"""
|
||||
The purpose of phase one is to find an initial basic feasible solution
|
||||
(BFS) to the original problem.
|
||||
|
||||
Generates an auxiliary problem with a trivial BFS and an objective that
|
||||
minimizes infeasibility of the original problem. Solves the auxiliary
|
||||
problem using the main simplex routine (phase two). This either yields
|
||||
a BFS to the original problem or determines that the original problem is
|
||||
infeasible. If feasible, phase one detects redundant rows in the original
|
||||
constraint matrix and removes them, then chooses additional indices as
|
||||
necessary to complete a basis/BFS for the original problem.
|
||||
"""
|
||||
|
||||
m, n = A.shape
|
||||
status = 0
|
||||
|
||||
# generate auxiliary problem to get initial BFS
|
||||
A, b, c, basis, x, status = _generate_auxiliary_problem(A, b, x0, tol)
|
||||
|
||||
if status == 6:
|
||||
residual = c.dot(x)
|
||||
iter_k = 0
|
||||
return x, basis, A, b, residual, status, iter_k
|
||||
|
||||
# solve auxiliary problem
|
||||
phase_one_n = n
|
||||
iter_k = 0
|
||||
x, basis, status, iter_k = _phase_two(c, A, x, basis, callback,
|
||||
postsolve_args,
|
||||
maxiter, tol, disp,
|
||||
maxupdate, mast, pivot,
|
||||
iter_k, phase_one_n)
|
||||
|
||||
# check for infeasibility
|
||||
residual = c.dot(x)
|
||||
if status == 0 and residual > tol:
|
||||
status = 2
|
||||
|
||||
# drive artificial variables out of basis
|
||||
# TODO: test redundant row removal better
|
||||
# TODO: make solve more efficient with BGLU? This could take a while.
|
||||
keep_rows = np.ones(m, dtype=bool)
|
||||
for basis_column in basis[basis >= n]:
|
||||
B = A[:, basis]
|
||||
try:
|
||||
basis_finder = np.abs(solve(B, A)) # inefficient
|
||||
pertinent_row = np.argmax(basis_finder[:, basis_column])
|
||||
eligible_columns = np.ones(n, dtype=bool)
|
||||
eligible_columns[basis[basis < n]] = 0
|
||||
eligible_column_indices = np.where(eligible_columns)[0]
|
||||
index = np.argmax(basis_finder[:, :n]
|
||||
[pertinent_row, eligible_columns])
|
||||
new_basis_column = eligible_column_indices[index]
|
||||
if basis_finder[pertinent_row, new_basis_column] < tol:
|
||||
keep_rows[pertinent_row] = False
|
||||
else:
|
||||
basis[basis == basis_column] = new_basis_column
|
||||
except LinAlgError:
|
||||
status = 4
|
||||
|
||||
# form solution to original problem
|
||||
A = A[keep_rows, :n]
|
||||
basis = basis[keep_rows]
|
||||
x = x[:n]
|
||||
m = A.shape[0]
|
||||
return x, basis, A, b, residual, status, iter_k
|
||||
|
||||
|
||||
def _get_more_basis_columns(A, basis):
|
||||
"""
|
||||
Called when the auxiliary problem terminates with artificial columns in
|
||||
the basis, which must be removed and replaced with non-artificial
|
||||
columns. Finds additional columns that do not make the matrix singular.
|
||||
"""
|
||||
m, n = A.shape
|
||||
|
||||
# options for inclusion are those that aren't already in the basis
|
||||
a = np.arange(m+n)
|
||||
bl = np.zeros(len(a), dtype=bool)
|
||||
bl[basis] = 1
|
||||
options = a[~bl]
|
||||
options = options[options < n] # and they have to be non-artificial
|
||||
|
||||
# form basis matrix
|
||||
B = np.zeros((m, m))
|
||||
B[:, 0:len(basis)] = A[:, basis]
|
||||
|
||||
if (basis.size > 0 and
|
||||
np.linalg.matrix_rank(B[:, :len(basis)]) < len(basis)):
|
||||
raise Exception("Basis has dependent columns")
|
||||
|
||||
rank = 0 # just enter the loop
|
||||
for i in range(n): # somewhat arbitrary, but we need another way out
|
||||
# permute the options, and take as many as needed
|
||||
new_basis = np.random.permutation(options)[:m-len(basis)]
|
||||
B[:, len(basis):] = A[:, new_basis] # update the basis matrix
|
||||
rank = np.linalg.matrix_rank(B) # check the rank
|
||||
if rank == m:
|
||||
break
|
||||
|
||||
return np.concatenate((basis, new_basis))
|
||||
|
||||
|
||||
def _generate_auxiliary_problem(A, b, x0, tol):
|
||||
"""
|
||||
Modifies original problem to create an auxiliary problem with a trivial
|
||||
initial basic feasible solution and an objective that minimizes
|
||||
infeasibility in the original problem.
|
||||
|
||||
Conceptually, this is done by stacking an identity matrix on the right of
|
||||
the original constraint matrix, adding artificial variables to correspond
|
||||
with each of these new columns, and generating a cost vector that is all
|
||||
zeros except for ones corresponding with each of the new variables.
|
||||
|
||||
A initial basic feasible solution is trivial: all variables are zero
|
||||
except for the artificial variables, which are set equal to the
|
||||
corresponding element of the right hand side `b`.
|
||||
|
||||
Running the simplex method on this auxiliary problem drives all of the
|
||||
artificial variables - and thus the cost - to zero if the original problem
|
||||
is feasible. The original problem is declared infeasible otherwise.
|
||||
|
||||
Much of the complexity below is to improve efficiency by using singleton
|
||||
columns in the original problem where possible, thus generating artificial
|
||||
variables only as necessary, and using an initial 'guess' basic feasible
|
||||
solution.
|
||||
"""
|
||||
status = 0
|
||||
m, n = A.shape
|
||||
|
||||
if x0 is not None:
|
||||
x = x0
|
||||
else:
|
||||
x = np.zeros(n)
|
||||
|
||||
r = b - A@x # residual; this must be all zeros for feasibility
|
||||
|
||||
A[r < 0] = -A[r < 0] # express problem with RHS positive for trivial BFS
|
||||
b[r < 0] = -b[r < 0] # to the auxiliary problem
|
||||
r[r < 0] *= -1
|
||||
|
||||
# Rows which we will need to find a trivial way to zero.
|
||||
# This should just be the rows where there is a nonzero residual.
|
||||
# But then we would not necessarily have a column singleton in every row.
|
||||
# This makes it difficult to find an initial basis.
|
||||
if x0 is None:
|
||||
nonzero_constraints = np.arange(m)
|
||||
else:
|
||||
nonzero_constraints = np.where(r > tol)[0]
|
||||
|
||||
# these are (at least some of) the initial basis columns
|
||||
basis = np.where(np.abs(x) > tol)[0]
|
||||
|
||||
if len(nonzero_constraints) == 0 and len(basis) <= m: # already a BFS
|
||||
c = np.zeros(n)
|
||||
basis = _get_more_basis_columns(A, basis)
|
||||
return A, b, c, basis, x, status
|
||||
elif (len(nonzero_constraints) > m - len(basis) or
|
||||
np.any(x < 0)): # can't get trivial BFS
|
||||
c = np.zeros(n)
|
||||
status = 6
|
||||
return A, b, c, basis, x, status
|
||||
|
||||
# chooses existing columns appropriate for inclusion in initial basis
|
||||
cols, rows = _select_singleton_columns(A, r)
|
||||
|
||||
# find the rows we need to zero that we _can_ zero with column singletons
|
||||
i_tofix = np.isin(rows, nonzero_constraints)
|
||||
# these columns can't already be in the basis, though
|
||||
# we are going to add them to the basis and change the corresponding x val
|
||||
i_notinbasis = np.logical_not(np.isin(cols, basis))
|
||||
i_fix_without_aux = np.logical_and(i_tofix, i_notinbasis)
|
||||
rows = rows[i_fix_without_aux]
|
||||
cols = cols[i_fix_without_aux]
|
||||
|
||||
# indices of the rows we can only zero with auxiliary variable
|
||||
# these rows will get a one in each auxiliary column
|
||||
arows = nonzero_constraints[np.logical_not(
|
||||
np.isin(nonzero_constraints, rows))]
|
||||
n_aux = len(arows)
|
||||
acols = n + np.arange(n_aux) # indices of auxiliary columns
|
||||
|
||||
basis_ng = np.concatenate((cols, acols)) # basis columns not from guess
|
||||
basis_ng_rows = np.concatenate((rows, arows)) # rows we need to zero
|
||||
|
||||
# add auxiliary singleton columns
|
||||
A = np.hstack((A, np.zeros((m, n_aux))))
|
||||
A[arows, acols] = 1
|
||||
|
||||
# generate initial BFS
|
||||
x = np.concatenate((x, np.zeros(n_aux)))
|
||||
x[basis_ng] = r[basis_ng_rows]/A[basis_ng_rows, basis_ng]
|
||||
|
||||
# generate costs to minimize infeasibility
|
||||
c = np.zeros(n_aux + n)
|
||||
c[acols] = 1
|
||||
|
||||
# basis columns correspond with nonzeros in guess, those with column
|
||||
# singletons we used to zero remaining constraints, and any additional
|
||||
# columns to get a full set (m columns)
|
||||
basis = np.concatenate((basis, basis_ng))
|
||||
basis = _get_more_basis_columns(A, basis) # add columns as needed
|
||||
|
||||
return A, b, c, basis, x, status
|
||||
|
||||
|
||||
def _select_singleton_columns(A, b):
|
||||
"""
|
||||
Finds singleton columns for which the singleton entry is of the same sign
|
||||
as the right-hand side; these columns are eligible for inclusion in an
|
||||
initial basis. Determines the rows in which the singleton entries are
|
||||
located. For each of these rows, returns the indices of the one singleton
|
||||
column and its corresponding row.
|
||||
"""
|
||||
# find indices of all singleton columns and corresponding row indices
|
||||
column_indices = np.nonzero(np.sum(np.abs(A) != 0, axis=0) == 1)[0]
|
||||
columns = A[:, column_indices] # array of singleton columns
|
||||
row_indices = np.zeros(len(column_indices), dtype=int)
|
||||
nonzero_rows, nonzero_columns = np.nonzero(columns)
|
||||
row_indices[nonzero_columns] = nonzero_rows # corresponding row indices
|
||||
|
||||
# keep only singletons with entries that have same sign as RHS
|
||||
# this is necessary because all elements of BFS must be non-negative
|
||||
same_sign = A[row_indices, column_indices]*b[row_indices] >= 0
|
||||
column_indices = column_indices[same_sign][::-1]
|
||||
row_indices = row_indices[same_sign][::-1]
|
||||
# Reversing the order so that steps below select rightmost columns
|
||||
# for initial basis, which will tend to be slack variables. (If the
|
||||
# guess corresponds with a basic feasible solution but a constraint
|
||||
# is not satisfied with the corresponding slack variable zero, the slack
|
||||
# variable must be basic.)
|
||||
|
||||
# for each row, keep rightmost singleton column with an entry in that row
|
||||
unique_row_indices, first_columns = np.unique(row_indices,
|
||||
return_index=True)
|
||||
return column_indices[first_columns], unique_row_indices
|
||||
|
||||
|
||||
def _find_nonzero_rows(A, tol):
|
||||
"""
|
||||
Returns logical array indicating the locations of rows with at least
|
||||
one nonzero element.
|
||||
"""
|
||||
return np.any(np.abs(A) > tol, axis=1)
|
||||
|
||||
|
||||
def _select_enter_pivot(c_hat, bl, a, rule="bland", tol=1e-12):
|
||||
"""
|
||||
Selects a pivot to enter the basis. Currently Bland's rule - the smallest
|
||||
index that has a negative reduced cost - is the default.
|
||||
"""
|
||||
if rule.lower() == "mrc": # index with minimum reduced cost
|
||||
return a[~bl][np.argmin(c_hat)]
|
||||
else: # smallest index w/ negative reduced cost
|
||||
return a[~bl][c_hat < -tol][0]
|
||||
|
||||
|
||||
def _display_iter(phase, iteration, slack, con, fun):
|
||||
"""
|
||||
Print indicators of optimization status to the console.
|
||||
"""
|
||||
header = True if not iteration % 20 else False
|
||||
|
||||
if header:
|
||||
print("Phase",
|
||||
"Iteration",
|
||||
"Minimum Slack ",
|
||||
"Constraint Residual",
|
||||
"Objective ")
|
||||
|
||||
# :<X.Y left aligns Y digits in X digit spaces
|
||||
fmt = '{0:<6}{1:<10}{2:<20.13}{3:<20.13}{4:<20.13}'
|
||||
try:
|
||||
slack = np.min(slack)
|
||||
except ValueError:
|
||||
slack = "NA"
|
||||
print(fmt.format(phase, iteration, slack, np.linalg.norm(con), fun))
|
||||
|
||||
|
||||
def _display_and_callback(phase_one_n, x, postsolve_args, status,
|
||||
iteration, disp, callback):
|
||||
if phase_one_n is not None:
|
||||
phase = 1
|
||||
x_postsolve = x[:phase_one_n]
|
||||
else:
|
||||
phase = 2
|
||||
x_postsolve = x
|
||||
x_o, fun, slack, con = _postsolve(x_postsolve,
|
||||
postsolve_args)
|
||||
|
||||
if callback is not None:
|
||||
res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
|
||||
'con': con, 'nit': iteration,
|
||||
'phase': phase, 'complete': False,
|
||||
'status': status, 'message': "",
|
||||
'success': False})
|
||||
callback(res)
|
||||
if disp:
|
||||
_display_iter(phase, iteration, slack, con, fun)
|
||||
|
||||
|
||||
def _phase_two(c, A, x, b, callback, postsolve_args, maxiter, tol, disp,
|
||||
maxupdate, mast, pivot, iteration=0, phase_one_n=None):
|
||||
"""
|
||||
The heart of the simplex method. Beginning with a basic feasible solution,
|
||||
moves to adjacent basic feasible solutions successively lower reduced cost.
|
||||
Terminates when there are no basic feasible solutions with lower reduced
|
||||
cost or if the problem is determined to be unbounded.
|
||||
|
||||
This implementation follows the revised simplex method based on LU
|
||||
decomposition. Rather than maintaining a tableau or an inverse of the
|
||||
basis matrix, we keep a factorization of the basis matrix that allows
|
||||
efficient solution of linear systems while avoiding stability issues
|
||||
associated with inverted matrices.
|
||||
"""
|
||||
m, n = A.shape
|
||||
status = 0
|
||||
a = np.arange(n) # indices of columns of A
|
||||
ab = np.arange(m) # indices of columns of B
|
||||
if maxupdate:
|
||||
# basis matrix factorization object; similar to B = A[:, b]
|
||||
B = BGLU(A, b, maxupdate, mast)
|
||||
else:
|
||||
B = LU(A, b)
|
||||
|
||||
for iteration in range(iteration, maxiter):
|
||||
|
||||
if disp or callback is not None:
|
||||
_display_and_callback(phase_one_n, x, postsolve_args, status,
|
||||
iteration, disp, callback)
|
||||
|
||||
bl = np.zeros(len(a), dtype=bool)
|
||||
bl[b] = 1
|
||||
|
||||
xb = x[b] # basic variables
|
||||
cb = c[b] # basic costs
|
||||
|
||||
try:
|
||||
v = B.solve(cb, transposed=True) # similar to v = solve(B.T, cb)
|
||||
except LinAlgError:
|
||||
status = 4
|
||||
break
|
||||
|
||||
# TODO: cythonize?
|
||||
c_hat = c - v.dot(A) # reduced cost
|
||||
c_hat = c_hat[~bl]
|
||||
# Above is much faster than:
|
||||
# N = A[:, ~bl] # slow!
|
||||
# c_hat = c[~bl] - v.T.dot(N)
|
||||
# Can we perform the multiplication only on the nonbasic columns?
|
||||
|
||||
if np.all(c_hat >= -tol): # all reduced costs positive -> terminate
|
||||
break
|
||||
|
||||
j = _select_enter_pivot(c_hat, bl, a, rule=pivot, tol=tol)
|
||||
u = B.solve(A[:, j]) # similar to u = solve(B, A[:, j])
|
||||
|
||||
i = u > tol # if none of the u are positive, unbounded
|
||||
if not np.any(i):
|
||||
status = 3
|
||||
break
|
||||
|
||||
th = xb[i]/u[i]
|
||||
l = np.argmin(th) # implicitly selects smallest subscript
|
||||
th_star = th[l] # step size
|
||||
|
||||
x[b] = x[b] - th_star*u # take step
|
||||
x[j] = th_star
|
||||
B.update(ab[i][l], j) # modify basis
|
||||
b = B.b # similar to b[ab[i][l]] =
|
||||
|
||||
else:
|
||||
# If the end of the for loop is reached (without a break statement),
|
||||
# then another step has been taken, so the iteration counter should
|
||||
# increment, info should be displayed, and callback should be called.
|
||||
iteration += 1
|
||||
status = 1
|
||||
if disp or callback is not None:
|
||||
_display_and_callback(phase_one_n, x, postsolve_args, status,
|
||||
iteration, disp, callback)
|
||||
|
||||
return x, b, status, iteration
|
||||
|
||||
|
||||
def _linprog_rs(c, c0, A, b, x0, callback, postsolve_args,
|
||||
maxiter=5000, tol=1e-12, disp=False,
|
||||
maxupdate=10, mast=False, pivot="mrc",
|
||||
**unknown_options):
|
||||
"""
|
||||
Solve the following linear programming problem via a two-phase
|
||||
revised simplex algorithm.::
|
||||
|
||||
minimize: c @ x
|
||||
|
||||
subject to: A @ x == b
|
||||
0 <= x < oo
|
||||
|
||||
User-facing documentation is in _linprog_doc.py.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array
|
||||
Coefficients of the linear objective function to be minimized.
|
||||
c0 : float
|
||||
Constant term in objective function due to fixed (and eliminated)
|
||||
variables. (Currently unused.)
|
||||
A : 2-D array
|
||||
2-D array which, when matrix-multiplied by ``x``, gives the values of
|
||||
the equality constraints at ``x``.
|
||||
b : 1-D array
|
||||
1-D array of values representing the RHS of each equality constraint
|
||||
(row) in ``A_eq``.
|
||||
x0 : 1-D array, optional
|
||||
Starting values of the independent variables, which will be refined by
|
||||
the optimization algorithm. For the revised simplex method, these must
|
||||
correspond with a basic feasible solution.
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called within each
|
||||
iteration of the algorithm. The callback function must accept a single
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
Current solution vector.
|
||||
fun : float
|
||||
Current value of the objective function ``c @ x``.
|
||||
success : bool
|
||||
True only when an algorithm has completed successfully,
|
||||
so this is always False as the callback function is called
|
||||
only while the algorithm is still iterating.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable
|
||||
corresponds to an inequality constraint. If the slack is zero,
|
||||
the corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
that is, ``b - A_eq @ x``.
|
||||
phase : int
|
||||
The phase of the algorithm being executed.
|
||||
status : int
|
||||
For revised simplex, this is always 0 because if a different
|
||||
status is detected, the algorithm terminates.
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
postsolve_args : tuple
|
||||
Data needed by _postsolve to convert the solution to the standard-form
|
||||
problem into the solution to the original problem.
|
||||
|
||||
Options
|
||||
-------
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform in either phase.
|
||||
tol : float
|
||||
The tolerance which determines when a solution is "close enough" to
|
||||
zero in Phase 1 to be considered a basic feasible solution or close
|
||||
enough to positive to serve as an optimal solution.
|
||||
disp : bool
|
||||
Set to ``True`` if indicators of optimization status are to be printed
|
||||
to the console each iteration.
|
||||
maxupdate : int
|
||||
The maximum number of updates performed on the LU factorization.
|
||||
After this many updates is reached, the basis matrix is factorized
|
||||
from scratch.
|
||||
mast : bool
|
||||
Minimize Amortized Solve Time. If enabled, the average time to solve
|
||||
a linear system using the basis factorization is measured. Typically,
|
||||
the average solve time will decrease with each successive solve after
|
||||
initial factorization, as factorization takes much more time than the
|
||||
solve operation (and updates). Eventually, however, the updated
|
||||
factorization becomes sufficiently complex that the average solve time
|
||||
begins to increase. When this is detected, the basis is refactorized
|
||||
from scratch. Enable this option to maximize speed at the risk of
|
||||
nondeterministic behavior. Ignored if ``maxupdate`` is 0.
|
||||
pivot : "mrc" or "bland"
|
||||
Pivot rule: Minimum Reduced Cost (default) or Bland's rule. Choose
|
||||
Bland's rule if iteration limit is reached and cycling is suspected.
|
||||
unknown_options : dict
|
||||
Optional arguments not used by this particular solver. If
|
||||
`unknown_options` is non-empty a warning is issued listing all
|
||||
unused options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : 1-D array
|
||||
Solution vector.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Numerical difficulties encountered
|
||||
5 : No constraints; turn presolve on
|
||||
6 : Guess x0 cannot be converted to a basic feasible solution
|
||||
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
iteration : int
|
||||
The number of iterations taken to solve the problem.
|
||||
"""
|
||||
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
messages = ["Optimization terminated successfully.",
|
||||
"Iteration limit reached.",
|
||||
"The problem appears infeasible, as the phase one auxiliary "
|
||||
"problem terminated successfully with a residual of {0:.1e}, "
|
||||
"greater than the tolerance {1} required for the solution to "
|
||||
"be considered feasible. Consider increasing the tolerance to "
|
||||
"be greater than {0:.1e}. If this tolerance is unacceptably "
|
||||
"large, the problem is likely infeasible.",
|
||||
"The problem is unbounded, as the simplex algorithm found "
|
||||
"a basic feasible solution from which there is a direction "
|
||||
"with negative reduced cost in which all decision variables "
|
||||
"increase.",
|
||||
"Numerical difficulties encountered; consider trying "
|
||||
"method='interior-point'.",
|
||||
"Problems with no constraints are trivially solved; please "
|
||||
"turn presolve on.",
|
||||
"The guess x0 cannot be converted to a basic feasible "
|
||||
"solution. "
|
||||
]
|
||||
|
||||
if A.size == 0: # address test_unbounded_below_no_presolve_corrected
|
||||
return np.zeros(c.shape), 5, messages[5], 0
|
||||
|
||||
x, basis, A, b, residual, status, iteration = (
|
||||
_phase_one(A, b, x0, callback, postsolve_args,
|
||||
maxiter, tol, disp, maxupdate, mast, pivot))
|
||||
|
||||
if status == 0:
|
||||
x, basis, status, iteration = _phase_two(c, A, x, basis, callback,
|
||||
postsolve_args,
|
||||
maxiter, tol, disp,
|
||||
maxupdate, mast, pivot,
|
||||
iteration)
|
||||
|
||||
return x, status, messages[status].format(residual, tol), iteration
|
||||
|
|
@ -0,0 +1,663 @@
|
|||
"""Simplex method for linear programming
|
||||
|
||||
The *simplex* method uses a traditional, full-tableau implementation of
|
||||
Dantzig's simplex algorithm [1]_, [2]_ (*not* the Nelder-Mead simplex).
|
||||
This algorithm is included for backwards compatibility and educational
|
||||
purposes.
|
||||
|
||||
.. versionadded:: 0.15.0
|
||||
|
||||
Warnings
|
||||
--------
|
||||
|
||||
The simplex method may encounter numerical difficulties when pivot
|
||||
values are close to the specified tolerance. If encountered try
|
||||
remove any redundant constraints, change the pivot strategy to Bland's
|
||||
rule or increase the tolerance value.
|
||||
|
||||
Alternatively, more robust methods maybe be used. See
|
||||
:ref:`'interior-point' <optimize.linprog-interior-point>` and
|
||||
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
||||
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
||||
1963
|
||||
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
||||
Mathematical Programming", McGraw-Hill, Chapter 4.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from warnings import warn
|
||||
from ._optimize import OptimizeResult, OptimizeWarning, _check_unknown_options
|
||||
from ._linprog_util import _postsolve
|
||||
|
||||
|
||||
def _pivot_col(T, tol=1e-9, bland=False):
|
||||
"""
|
||||
Given a linear programming simplex tableau, determine the column
|
||||
of the variable to enter the basis.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
tol : float
|
||||
Elements in the objective row larger than -tol will not be considered
|
||||
for pivoting. Nominally this value is zero, but numerical issues
|
||||
cause a tolerance about zero to be necessary.
|
||||
bland : bool
|
||||
If True, use Bland's rule for selection of the column (select the
|
||||
first column with a negative coefficient in the objective row,
|
||||
regardless of magnitude).
|
||||
|
||||
Returns
|
||||
-------
|
||||
status: bool
|
||||
True if a suitable pivot column was found, otherwise False.
|
||||
A return of False indicates that the linear programming simplex
|
||||
algorithm is complete.
|
||||
col: int
|
||||
The index of the column of the pivot element.
|
||||
If status is False, col will be returned as nan.
|
||||
"""
|
||||
ma = np.ma.masked_where(T[-1, :-1] >= -tol, T[-1, :-1], copy=False)
|
||||
if ma.count() == 0:
|
||||
return False, np.nan
|
||||
if bland:
|
||||
# ma.mask is sometimes 0d
|
||||
return True, np.nonzero(np.logical_not(np.atleast_1d(ma.mask)))[0][0]
|
||||
return True, np.ma.nonzero(ma == ma.min())[0][0]
|
||||
|
||||
|
||||
def _pivot_row(T, basis, pivcol, phase, tol=1e-9, bland=False):
|
||||
"""
|
||||
Given a linear programming simplex tableau, determine the row for the
|
||||
pivot operation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a Problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
basis : array
|
||||
A list of the current basic variables.
|
||||
pivcol : int
|
||||
The index of the pivot column.
|
||||
phase : int
|
||||
The phase of the simplex algorithm (1 or 2).
|
||||
tol : float
|
||||
Elements in the pivot column smaller than tol will not be considered
|
||||
for pivoting. Nominally this value is zero, but numerical issues
|
||||
cause a tolerance about zero to be necessary.
|
||||
bland : bool
|
||||
If True, use Bland's rule for selection of the row (if more than one
|
||||
row can be used, choose the one with the lowest variable index).
|
||||
|
||||
Returns
|
||||
-------
|
||||
status: bool
|
||||
True if a suitable pivot row was found, otherwise False. A return
|
||||
of False indicates that the linear programming problem is unbounded.
|
||||
row: int
|
||||
The index of the row of the pivot element. If status is False, row
|
||||
will be returned as nan.
|
||||
"""
|
||||
if phase == 1:
|
||||
k = 2
|
||||
else:
|
||||
k = 1
|
||||
ma = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, pivcol], copy=False)
|
||||
if ma.count() == 0:
|
||||
return False, np.nan
|
||||
mb = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, -1], copy=False)
|
||||
q = mb / ma
|
||||
min_rows = np.ma.nonzero(q == q.min())[0]
|
||||
if bland:
|
||||
return True, min_rows[np.argmin(np.take(basis, min_rows))]
|
||||
return True, min_rows[0]
|
||||
|
||||
|
||||
def _apply_pivot(T, basis, pivrow, pivcol, tol=1e-9):
|
||||
"""
|
||||
Pivot the simplex tableau inplace on the element given by (pivrow, pivol).
|
||||
The entering variable corresponds to the column given by pivcol forcing
|
||||
the variable basis[pivrow] to leave the basis.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
basis : 1-D array
|
||||
An array of the indices of the basic variables, such that basis[i]
|
||||
contains the column corresponding to the basic variable for row i.
|
||||
Basis is modified in place by _apply_pivot.
|
||||
pivrow : int
|
||||
Row index of the pivot.
|
||||
pivcol : int
|
||||
Column index of the pivot.
|
||||
"""
|
||||
basis[pivrow] = pivcol
|
||||
pivval = T[pivrow, pivcol]
|
||||
T[pivrow] = T[pivrow] / pivval
|
||||
for irow in range(T.shape[0]):
|
||||
if irow != pivrow:
|
||||
T[irow] = T[irow] - T[pivrow] * T[irow, pivcol]
|
||||
|
||||
# The selected pivot should never lead to a pivot value less than the tol.
|
||||
if np.isclose(pivval, tol, atol=0, rtol=1e4):
|
||||
message = (
|
||||
f"The pivot operation produces a pivot value of:{pivval: .1e}, "
|
||||
"which is only slightly greater than the specified "
|
||||
f"tolerance{tol: .1e}. This may lead to issues regarding the "
|
||||
"numerical stability of the simplex method. "
|
||||
"Removing redundant constraints, changing the pivot strategy "
|
||||
"via Bland's rule or increasing the tolerance may "
|
||||
"help reduce the issue.")
|
||||
warn(message, OptimizeWarning, stacklevel=5)
|
||||
|
||||
|
||||
def _solve_simplex(T, n, basis, callback, postsolve_args,
|
||||
maxiter=1000, tol=1e-9, phase=2, bland=False, nit0=0,
|
||||
):
|
||||
"""
|
||||
Solve a linear programming problem in "standard form" using the Simplex
|
||||
Method. Linear Programming is intended to solve the following problem form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A @ x == b
|
||||
x >= 0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
n : int
|
||||
The number of true variables in the problem.
|
||||
basis : 1-D array
|
||||
An array of the indices of the basic variables, such that basis[i]
|
||||
contains the column corresponding to the basic variable for row i.
|
||||
Basis is modified in place by _solve_simplex
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called within each
|
||||
iteration of the algorithm. The callback must accept a
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
Current solution vector
|
||||
fun : float
|
||||
Current value of the objective function
|
||||
success : bool
|
||||
True only when a phase has completed successfully. This
|
||||
will be False for most iterations.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable
|
||||
corresponds to an inequality constraint. If the slack is zero,
|
||||
the corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
that is, ``b - A_eq @ x``
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
postsolve_args : tuple
|
||||
Data needed by _postsolve to convert the solution to the standard-form
|
||||
problem into the solution to the original problem.
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform before aborting the
|
||||
optimization.
|
||||
tol : float
|
||||
The tolerance which determines when a solution is "close enough" to
|
||||
zero in Phase 1 to be considered a basic feasible solution or close
|
||||
enough to positive to serve as an optimal solution.
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
bland : bool
|
||||
If True, choose pivots using Bland's rule [3]_. In problems which
|
||||
fail to converge due to cycling, using Bland's rule can provide
|
||||
convergence at the expense of a less optimal path about the simplex.
|
||||
nit0 : int
|
||||
The initial iteration number used to keep an accurate iteration total
|
||||
in a two-phase problem.
|
||||
|
||||
Returns
|
||||
-------
|
||||
nit : int
|
||||
The number of iterations. Used to keep an accurate iteration total
|
||||
in the two-phase problem.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
"""
|
||||
nit = nit0
|
||||
status = 0
|
||||
message = ''
|
||||
complete = False
|
||||
|
||||
if phase == 1:
|
||||
m = T.shape[1]-2
|
||||
elif phase == 2:
|
||||
m = T.shape[1]-1
|
||||
else:
|
||||
raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")
|
||||
|
||||
if phase == 2:
|
||||
# Check if any artificial variables are still in the basis.
|
||||
# If yes, check if any coefficients from this row and a column
|
||||
# corresponding to one of the non-artificial variable is non-zero.
|
||||
# If found, pivot at this term. If not, start phase 2.
|
||||
# Do this for all artificial variables in the basis.
|
||||
# Ref: "An Introduction to Linear Programming and Game Theory"
|
||||
# by Paul R. Thie, Gerard E. Keough, 3rd Ed,
|
||||
# Chapter 3.7 Redundant Systems (pag 102)
|
||||
for pivrow in [row for row in range(basis.size)
|
||||
if basis[row] > T.shape[1] - 2]:
|
||||
non_zero_row = [col for col in range(T.shape[1] - 1)
|
||||
if abs(T[pivrow, col]) > tol]
|
||||
if len(non_zero_row) > 0:
|
||||
pivcol = non_zero_row[0]
|
||||
_apply_pivot(T, basis, pivrow, pivcol, tol)
|
||||
nit += 1
|
||||
|
||||
if len(basis[:m]) == 0:
|
||||
solution = np.empty(T.shape[1] - 1, dtype=np.float64)
|
||||
else:
|
||||
solution = np.empty(max(T.shape[1] - 1, max(basis[:m]) + 1),
|
||||
dtype=np.float64)
|
||||
|
||||
while not complete:
|
||||
# Find the pivot column
|
||||
pivcol_found, pivcol = _pivot_col(T, tol, bland)
|
||||
if not pivcol_found:
|
||||
pivcol = np.nan
|
||||
pivrow = np.nan
|
||||
status = 0
|
||||
complete = True
|
||||
else:
|
||||
# Find the pivot row
|
||||
pivrow_found, pivrow = _pivot_row(T, basis, pivcol, phase, tol, bland)
|
||||
if not pivrow_found:
|
||||
status = 3
|
||||
complete = True
|
||||
|
||||
if callback is not None:
|
||||
solution[:] = 0
|
||||
solution[basis[:n]] = T[:n, -1]
|
||||
x = solution[:m]
|
||||
x, fun, slack, con = _postsolve(
|
||||
x, postsolve_args
|
||||
)
|
||||
res = OptimizeResult({
|
||||
'x': x,
|
||||
'fun': fun,
|
||||
'slack': slack,
|
||||
'con': con,
|
||||
'status': status,
|
||||
'message': message,
|
||||
'nit': nit,
|
||||
'success': status == 0 and complete,
|
||||
'phase': phase,
|
||||
'complete': complete,
|
||||
})
|
||||
callback(res)
|
||||
|
||||
if not complete:
|
||||
if nit >= maxiter:
|
||||
# Iteration limit exceeded
|
||||
status = 1
|
||||
complete = True
|
||||
else:
|
||||
_apply_pivot(T, basis, pivrow, pivcol, tol)
|
||||
nit += 1
|
||||
return nit, status
|
||||
|
||||
|
||||
def _linprog_simplex(c, c0, A, b, callback, postsolve_args,
|
||||
maxiter=1000, tol=1e-9, disp=False, bland=False,
|
||||
**unknown_options):
|
||||
"""
|
||||
Minimize a linear objective function subject to linear equality and
|
||||
non-negativity constraints using the two phase simplex method.
|
||||
Linear programming is intended to solve problems of the following form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A @ x == b
|
||||
x >= 0
|
||||
|
||||
User-facing documentation is in _linprog_doc.py.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array
|
||||
Coefficients of the linear objective function to be minimized.
|
||||
c0 : float
|
||||
Constant term in objective function due to fixed (and eliminated)
|
||||
variables. (Purely for display.)
|
||||
A : 2-D array
|
||||
2-D array such that ``A @ x``, gives the values of the equality
|
||||
constraints at ``x``.
|
||||
b : 1-D array
|
||||
1-D array of values representing the right hand side of each equality
|
||||
constraint (row) in ``A``.
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called within each
|
||||
iteration of the algorithm. The callback function must accept a single
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
Current solution vector
|
||||
fun : float
|
||||
Current value of the objective function
|
||||
success : bool
|
||||
True when an algorithm has completed successfully.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable
|
||||
corresponds to an inequality constraint. If the slack is zero,
|
||||
the corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
that is, ``b - A_eq @ x``
|
||||
phase : int
|
||||
The phase of the algorithm being executed.
|
||||
status : int
|
||||
An integer representing the status of the optimization::
|
||||
|
||||
0 : Algorithm proceeding nominally
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
postsolve_args : tuple
|
||||
Data needed by _postsolve to convert the solution to the standard-form
|
||||
problem into the solution to the original problem.
|
||||
|
||||
Options
|
||||
-------
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform.
|
||||
disp : bool
|
||||
If True, print exit status message to sys.stdout
|
||||
tol : float
|
||||
The tolerance which determines when a solution is "close enough" to
|
||||
zero in Phase 1 to be considered a basic feasible solution or close
|
||||
enough to positive to serve as an optimal solution.
|
||||
bland : bool
|
||||
If True, use Bland's anti-cycling rule [3]_ to choose pivots to
|
||||
prevent cycling. If False, choose pivots which should lead to a
|
||||
converged solution more quickly. The latter method is subject to
|
||||
cycling (non-convergence) in rare instances.
|
||||
unknown_options : dict
|
||||
Optional arguments not used by this particular solver. If
|
||||
`unknown_options` is non-empty a warning is issued listing all
|
||||
unused options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : 1-D array
|
||||
Solution vector.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
iteration : int
|
||||
The number of iterations taken to solve the problem.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
||||
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
||||
1963
|
||||
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
||||
Mathematical Programming", McGraw-Hill, Chapter 4.
|
||||
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
||||
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
||||
|
||||
|
||||
Notes
|
||||
-----
|
||||
The expected problem formulation differs between the top level ``linprog``
|
||||
module and the method specific solvers. The method specific solvers expect a
|
||||
problem in standard form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A @ x == b
|
||||
x >= 0
|
||||
|
||||
Whereas the top level ``linprog`` module expects a problem of form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A_ub @ x <= b_ub
|
||||
A_eq @ x == b_eq
|
||||
lb <= x <= ub
|
||||
|
||||
where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
|
||||
|
||||
The original problem contains equality, upper-bound and variable constraints
|
||||
whereas the method specific solver requires equality constraints and
|
||||
variable non-negativity.
|
||||
|
||||
``linprog`` module converts the original problem to standard form by
|
||||
converting the simple bounds to upper bound constraints, introducing
|
||||
non-negative slack variables for inequality constraints, and expressing
|
||||
unbounded variables as the difference between two non-negative variables.
|
||||
"""
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
status = 0
|
||||
messages = {0: "Optimization terminated successfully.",
|
||||
1: "Iteration limit reached.",
|
||||
2: "Optimization failed. Unable to find a feasible"
|
||||
" starting point.",
|
||||
3: "Optimization failed. The problem appears to be unbounded.",
|
||||
4: "Optimization failed. Singular matrix encountered."}
|
||||
|
||||
n, m = A.shape
|
||||
|
||||
# All constraints must have b >= 0.
|
||||
is_negative_constraint = np.less(b, 0)
|
||||
A[is_negative_constraint] *= -1
|
||||
b[is_negative_constraint] *= -1
|
||||
|
||||
# As all constraints are equality constraints the artificial variables
|
||||
# will also be basic variables.
|
||||
av = np.arange(n) + m
|
||||
basis = av.copy()
|
||||
|
||||
# Format the phase one tableau by adding artificial variables and stacking
|
||||
# the constraints, the objective row and pseudo-objective row.
|
||||
row_constraints = np.hstack((A, np.eye(n), b[:, np.newaxis]))
|
||||
row_objective = np.hstack((c, np.zeros(n), c0))
|
||||
row_pseudo_objective = -row_constraints.sum(axis=0)
|
||||
row_pseudo_objective[av] = 0
|
||||
T = np.vstack((row_constraints, row_objective, row_pseudo_objective))
|
||||
|
||||
nit1, status = _solve_simplex(T, n, basis, callback=callback,
|
||||
postsolve_args=postsolve_args,
|
||||
maxiter=maxiter, tol=tol, phase=1,
|
||||
bland=bland
|
||||
)
|
||||
# if pseudo objective is zero, remove the last row from the tableau and
|
||||
# proceed to phase 2
|
||||
nit2 = nit1
|
||||
if abs(T[-1, -1]) < tol:
|
||||
# Remove the pseudo-objective row from the tableau
|
||||
T = T[:-1, :]
|
||||
# Remove the artificial variable columns from the tableau
|
||||
T = np.delete(T, av, 1)
|
||||
else:
|
||||
# Failure to find a feasible starting point
|
||||
status = 2
|
||||
messages[status] = (
|
||||
"Phase 1 of the simplex method failed to find a feasible "
|
||||
"solution. The pseudo-objective function evaluates to "
|
||||
f"{abs(T[-1, -1]):.1e} "
|
||||
f"which exceeds the required tolerance of {tol} for a solution to be "
|
||||
"considered 'close enough' to zero to be a basic solution. "
|
||||
"Consider increasing the tolerance to be greater than "
|
||||
f"{abs(T[-1, -1]):.1e}. "
|
||||
"If this tolerance is unacceptably large the problem may be "
|
||||
"infeasible."
|
||||
)
|
||||
|
||||
if status == 0:
|
||||
# Phase 2
|
||||
nit2, status = _solve_simplex(T, n, basis, callback=callback,
|
||||
postsolve_args=postsolve_args,
|
||||
maxiter=maxiter, tol=tol, phase=2,
|
||||
bland=bland, nit0=nit1
|
||||
)
|
||||
|
||||
solution = np.zeros(n + m)
|
||||
solution[basis[:n]] = T[:n, -1]
|
||||
x = solution[:m]
|
||||
|
||||
return x, status, messages[status], int(nit2)
|
||||
1521
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_util.py
Normal file
1521
venv/lib/python3.13/site-packages/scipy/optimize/_linprog_util.py
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
|
@ -0,0 +1,5 @@
|
|||
"""This module contains least-squares algorithms."""
|
||||
from .least_squares import least_squares
|
||||
from .lsq_linear import lsq_linear
|
||||
|
||||
__all__ = ['least_squares', 'lsq_linear']
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue