Skip to content

Commit e4e4049

Browse files
committed
First implementation of convex constraints
1 parent 3a3bd50 commit e4e4049

File tree

5 files changed

+196
-24
lines changed

5 files changed

+196
-24
lines changed

pybobyqa/controller.py

+23-9
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,11 @@ def able_to_do_restart(self):
9494

9595

9696
class Controller(object):
97-
def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True):
97+
def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True):
9898
self.objfun = objfun
9999
self.maxfun = maxfun
100100
self.args = args
101-
self.model = Model(npt, x0, f0, xl, xu, f0_nsamples, abs_tol=params("model.abs_tol"),
101+
self.model = Model(npt, x0, f0, xl, xu, projections, f0_nsamples, abs_tol=params("model.abs_tol"),
102102
precondition=params("interpolation.precondition"), do_logging=do_logging)
103103
self.nf = nf
104104
self.nx = nx
@@ -271,15 +271,24 @@ def initialise_random_directions(self, number_of_samples, num_directions, params
271271
def trust_region_step(self):
272272
# Build model for full least squares objectives
273273
gopt, H = self.model.build_full_model()
274-
try:
275-
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
276-
except ValueError:
277-
# A ValueError may be raised if gopt or H have nan/inf values (issue #14)
278-
# Although this should be picked up earlier, in this situation just return a zero
279-
# trust-region step, which leads to a safety step being called in the main algorithm.
274+
if np.any(np.isinf(gopt)) or np.any(np.isnan(gopt)) or np.any(np.isinf(H)) or np.any(np.isnan(H)):
275+
# Skip computing the step if gopt or H are badly formed
280276
d = np.zeros(gopt.shape)
281277
gnew = gopt.copy()
282278
crvmin = 0.0 # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors
279+
else:
280+
try:
281+
if self.model.projections is None:
282+
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
283+
else:
284+
d, gnew, crvmin = ctrsbox(self.model.xbase, self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.model.projections, self.delta)
285+
except ValueError:
286+
# A ValueError may be raised if gopt or H have nan/inf values (issue #14)
287+
# Although this should be picked up earlier, in this situation just return a zero
288+
# trust-region step, which leads to a safety step being called in the main algorithm.
289+
d = np.zeros(gopt.shape)
290+
gnew = gopt.copy()
291+
crvmin = 0.0 # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors
283292
return d, gopt, H, gnew, crvmin
284293

285294
def geometry_step(self, knew, adelt, number_of_samples, params):
@@ -293,7 +302,12 @@ def geometry_step(self, knew, adelt, number_of_samples, params):
293302

294303
# Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt
295304
try:
296-
xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
305+
if np.any(np.isinf(g)) or np.any(np.isnan(g)) or np.any(np.isinf(H)) or np.any(np.isnan(H)):
306+
raise ValueError
307+
if self.model.projections is None:
308+
xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
309+
else:
310+
xnew = ctrsbox_geometry(self.model.xbase, self.model.xopt(), c, g, H, self.model.sl, self.model.su, self.model.projections, adelt)
297311
except ValueError:
298312
# A ValueError may be raised if gopt or H have nan/inf values (issue #23)
299313
# Ideally this should be picked up earlier in self.model.lagrange_polynomial(...)

pybobyqa/model.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636

3737
from .hessian import to_upper_triangular_vector
3838
from .trust_region import trsbox_geometry
39-
from .util import sumsq, model_value
39+
from .util import sumsq, model_value, dykstra
4040

4141
__all__ = ['Model']
4242

@@ -45,7 +45,7 @@
4545

4646

4747
class Model(object):
48-
def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True):
48+
def __init__(self, npt, x0, f0, xl, xu, projections, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True):
4949
if n is None:
5050
n = len(x0)
5151
assert npt >= n + 1, "Require npt >= n+1 for quadratic models"
@@ -62,6 +62,7 @@ def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, prec
6262
self.xbase = x0.copy()
6363
self.sl = xl - self.xbase # lower bound w.r.t. xbase (require xpt >= sl)
6464
self.su = xu - self.xbase # upper bound w.r.t. xbase (require xpt <= su)
65+
self.projections = projections # list of projection-based constraints (excluding explicit bounds)
6566
self.points = np.zeros((npt, n)) # interpolation points w.r.t. xbase
6667

6768
# Function values

pybobyqa/solver.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __str__(self):
9696
return output
9797

9898

99-
def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
99+
def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
100100
diagnostic_info, scaling_changes, f0_avg_old=None, f0_nsamples_old=None, do_logging=True, print_progress=False):
101101
# Evaluate at x0 (keep nf, nx correct and check for f small)
102102
if f0_avg_old is None:
@@ -144,7 +144,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
144144
nx = nx_so_far
145145

146146
# Initialise controller
147-
control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging)
147+
control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging)
148148

149149
# Initialise interpolation set
150150
number_of_samples = max(nsamples(control.delta, control.rho, 0, nruns_so_far), 1)
@@ -665,7 +665,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
665665
return x, f, gradmin, hessmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info
666666

667667

668-
def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
668+
def solve(objfun, x0, args=(), bounds=None, projections=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
669669
objfun_has_noise=False, seek_global_minimum=False, scaling_within_bounds=False, do_logging=True, print_progress=False):
670670
n = len(x0)
671671
if type(x0) == list:
@@ -694,7 +694,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
694694
if (xl is None or xu is None) and scaling_within_bounds:
695695
scaling_within_bounds = False
696696
warnings.warn("Ignoring scaling_within_bounds=True for unconstrained problem/1-sided bounds", RuntimeWarning)
697-
697+
698+
if (projections is not None) and scaling_within_bounds:
699+
scaling_within_bounds = False
700+
warnings.warn("Ignoring scaling_within_bounds=True for problems with projections given", RuntimeWarning)
701+
698702
exit_info = None
699703
if seek_global_minimum and (xl is None or xu is None):
700704
exit_info = ExitInformation(EXIT_INPUT_ERROR, "If seeking global minimum, must specify upper and lower bounds")
@@ -761,6 +765,9 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
761765
if exit_info is None and np.min(xu - xl) < 2.0 * rhobeg:
762766
exit_info = ExitInformation(EXIT_INPUT_ERROR, "gap between lower and upper must be at least 2*rhobeg")
763767

768+
if exit_info is None and projections is not None and type(projections) != list:
769+
exit_info = ExitInformation(EXIT_INPUT_ERROR, "projections must be a list of functions")
770+
764771
if maxfun <= npt:
765772
warnings.warn("maxfun <= npt: Are you sure your budget is large enough?", RuntimeWarning)
766773

@@ -792,12 +799,12 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
792799
return results
793800

794801
# Enforce lower & upper bounds on x0
795-
idx = (x0 <= xl)
802+
idx = (x0 < xl)
796803
if np.any(idx):
797804
warnings.warn("x0 below lower bound, adjusting", RuntimeWarning)
798805
x0[idx] = xl[idx]
799806

800-
idx = (x0 >= xu)
807+
idx = (x0 > xu)
801808
if np.any(idx):
802809
warnings.warn("x0 above upper bound, adjusting", RuntimeWarning)
803810
x0[idx] = xu[idx]
@@ -808,7 +815,7 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
808815
nf = 0
809816
nx = 0
810817
xmin, fmin, gradmin, hessmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info = \
811-
solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
818+
solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
812819
diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)
813820

814821
# Hard restarts loop
@@ -829,11 +836,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
829836
% (fmin, nf, _rhobeg, _rhoend))
830837
if params("restarts.hard.use_old_fk"):
831838
xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
832-
solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
839+
solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
833840
diagnostic_info, scaling_changes, f0_avg_old=fmin, f0_nsamples_old=nsamples_min, do_logging=do_logging, print_progress=print_progress)
834841
else:
835842
xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
836-
solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
843+
solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
837844
diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)
838845

839846
if fmin2 < fmin or np.isnan(fmin):

pybobyqa/trust_region.py

+106-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,21 @@
99
s.t. ||d|| <= delta
1010
sl <= xopt + d <= su
1111
The other outputs: gnew is the gradient of the model at d, and crvmin has
12-
information about the curvature of the model at the solution.
12+
information about the curvature of the model at the solution:
13+
- If ||d||=delta, then crvmin = 0
14+
- If d is constrained in all directions by the box constraints, then crvmin = -1
15+
- Otherwise, crvmin > 0 is the smallest positive curvature seen in the Hessian, min_{k} (dk^T H dk / ||dk||^2) for iterates dk
16+
17+
For problems with general convex constraints, we have an alternative function
18+
d, gnew, crvmin = ctrsbox(xbase, xopt, g, H, sl, su, projections, delta)
19+
which solves the subproblem
20+
min_{d} g'*d + 0.5*d'*H*d
21+
s.t. ||d|| <= delta
22+
sl <= xopt + d <= su
23+
xbase + xopt + d in the intersection of all convex sets C with proj_{C} in the list 'projections'
24+
The outputs are the same as trsbox, but without the negative case for crvmin.
25+
This version is solved using projected gradient descent; see Chapter 10 of
26+
A. Beck, First-Order Methods in Optimization. SIAM, 2017.
1327
1428
Notes
1529
----
@@ -53,10 +67,10 @@
5367
USE_FORTRAN = False
5468

5569

56-
from .util import sumsq, model_value
70+
from .util import sumsq, model_value, dykstra, pball, pbox
5771

5872

59-
__all__ = ['trsbox', 'trsbox_geometry']
73+
__all__ = ['trsbox', 'trsbox_geometry', 'ctrsbox', 'ctrsbox_geometry']
6074

6175
ZERO_THRESH = 1e-14
6276

@@ -400,3 +414,92 @@ def trsbox_geometry(xbase, c, g, H, lower, upper, Delta, use_fortran=USE_FORTRAN
400414
return xbase + smin
401415
else:
402416
return xbase + smax
417+
418+
419+
def ctrsbox(xbase, xopt, g, H, sl, su, projections, delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8):
420+
n = xopt.size
421+
assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
422+
assert xbase.shape == (n,), "xbase and xopt has wrong shape (should be vector)"
423+
assert g.shape == (n,), "g and xopt have incompatible sizes"
424+
assert len(H.shape) == 2, "H must be a matrix"
425+
assert H.shape == (n, n), "H and xopt have incompatible sizes"
426+
if not np.allclose(H, H.T):
427+
# Enforce symmetry
428+
H = 0.5 * (H + H.T)
429+
warnings.warn("Trust-region solver: fixing non-symmetric Hessian", RuntimeWarning)
430+
assert sl.shape == (n,), "sl and xopt have incompatible sizes"
431+
assert su.shape == (n,), "su and xopt have incompatible sizes"
432+
assert delta > 0.0, "delta must be strictly positive"
433+
434+
# Get full list of required projections (i.e. including TR constraint and box constraints)
435+
proj_tr = lambda w: pball(w, xbase + xopt, delta)
436+
proj_box = lambda w: pbox(w, xbase + sl, xbase + su)
437+
P = list(projections) # make a copy of the projections list
438+
P.append(proj_tr)
439+
P.append(proj_box)
440+
441+
# Compute the joint projection into all constraints, but using increments from xopt
442+
def proj(d0):
443+
p = dykstra(P, xbase + xopt + d0, max_iter=dykstra_max_iters, tol=dykstra_tol)
444+
# we want the step only, so we subtract xbase+xopt
445+
# from the new point: proj(xk+d) - xk
446+
return p - xopt - xbase
447+
448+
# Compute Lipschitz constant of quadratic objective, L >= ||H||_2
449+
# Stepsize for projected GD will be Lk=L
450+
L = np.linalg.norm(H) # use L = ||H||_F >= ||H|_2 as valid L-smooth constant
451+
L = max(L, delta / np.linalg.norm(g)) # another upper bound; if L~0 then will take steps (1/L)*g, and don't want to go beyond trust-region
452+
453+
MAX_LOOP_ITERS = 100 * n ** 2
454+
455+
d = np.zeros((n,))
456+
gnew = g.copy()
457+
crvmin = -1.0
458+
459+
# projected GD loop
460+
for ii in range(MAX_LOOP_ITERS):
461+
s = proj(d - (1 / L) * gnew) - d # new step is dnew = d + s
462+
463+
# take the step
464+
d += s
465+
gnew += H.dot(s)
466+
467+
# update CRVMIN
468+
crv = np.dot(H.dot(s), s) / sumsq(s) if sumsq(s) >= ZERO_THRESH else crvmin
469+
crvmin = min(crvmin, crv) if crvmin != -1.0 else crv
470+
471+
# exit condition
472+
if np.linalg.norm(s) <= gtol:
473+
break
474+
475+
# Lastly, ensure the explicit bounds are hard-enforced
476+
if np.linalg.norm(d) > delta:
477+
d *= delta / np.linalg.norm(d)
478+
d = np.maximum(np.minimum(xopt + d, su), sl) - xopt # does not increase ||d||
479+
gnew = g + H.dot(d)
480+
481+
if np.linalg.norm(d) >= delta - ZERO_THRESH:
482+
crvmin = 0.0
483+
return d, gnew, crvmin
484+
485+
486+
def ctrsbox_geometry(xbase, xopt, c, g, H, lower, upper, projections, Delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8):
487+
# Given a Lagrange polynomial defined by: L(x) = c + g' * (x - xbase) + 0.5*(x-xbase)*H*(x-xbase)
488+
# Maximise |L(x)| in the feasible region + trust region - that is, solve:
489+
# max_x abs(c + g' * (x - xopt) + 0.5*(x-xopt)*H*(x-xopt))
490+
# s.t. lower <= x <= upper
491+
# ||x-xopt|| <= Delta
492+
# xbase+x in the intersection of all convex sets C with proj_{C} in the list 'projections'
493+
# Setting s = x-xbase (or x = xopt + s), this is equivalent to:
494+
# max_s abs(c + g' * s + 0.5*s*H*s)
495+
# s.t. lower <= xopt + s <= upper
496+
# ||s|| <= Delta
497+
# xbase + xopt + s in the intersection of all convex sets C with proj_{C} in the list 'projections'
498+
smin, gmin, crvmin = ctrsbox(xbase, xopt, g, H, lower, upper, projections, Delta,
499+
dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol) # minimise L(x)
500+
smax, gmax, crvmax = ctrsbox(xbase, xopt, -g, -H, lower, upper, projections, Delta,
501+
dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol) # maximise L(x)
502+
if abs(c + model_value(g, H, smin)) >= abs(c + model_value(g, H, smax)): # take largest abs value
503+
return xopt + smin
504+
else:
505+
return xopt + smax

pybobyqa/util.py

+48-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131

3232

3333
__all__ = ['sumsq', 'eval_objective', 'model_value', 'random_orthog_directions_within_bounds',
34-
'random_directions_within_bounds', 'apply_scaling', 'remove_scaling']
34+
'random_directions_within_bounds', 'apply_scaling', 'remove_scaling',
35+
'dykstra', 'pball', 'pbox']
3536

3637
module_logger = logging.getLogger(__name__)
3738

@@ -205,3 +206,49 @@ def remove_scaling(x_scaled, scaling_changes):
205206
return x_scaled
206207
shift, scale = scaling_changes
207208
return shift + x_scaled * scale
209+
210+
211+
def dykstra(P, x0, max_iter=100, tol=1e-10):
212+
# Dykstra's algorithm for computing the projection of x0 into the intersection
213+
# of several convex sets, each with its own projection operator.
214+
# For more details, including the robust termination condition, see
215+
# E. G. Birgin, M. Raydan. Robust Stopping Criteria for Dykstra's Algorithm.
216+
# SIAM J. Scientific Computing, 26:4 (2005), pp. 1405-1414.
217+
218+
# Here, x0 is the point to project, and P is a list of projection functions,
219+
# i.e. proj_{ith set}(x) = P[i](x)
220+
x = x0.copy()
221+
p = len(P)
222+
y = np.zeros((p, x0.shape[0]))
223+
224+
n = 0
225+
cI = float('inf')
226+
while n < max_iter and cI >= tol:
227+
cI = 0.0
228+
for i in range(p):
229+
# Update iterate
230+
prev_x = x.copy()
231+
x = P[i](prev_x - y[i,:])
232+
233+
# Update increment
234+
prev_y = y[i, :].copy()
235+
y[i, :] = x - (prev_x - prev_y)
236+
237+
# Stop condition
238+
cI += np.linalg.norm(prev_y - y[i, :])**2
239+
240+
n += 1
241+
242+
return x
243+
244+
245+
def pball(x, c, r):
246+
# Projection operator for a Euclidean ball with center c and radius r
247+
# i.e. pball(x, c, r) = proj_{B(c,r)}(x)
248+
return c + r/max(np.linalg.norm(x-c), r) * (x-c)
249+
250+
251+
def pbox(x, l, u):
252+
# Projection operator for box constraints, l <= x <= u
253+
# i.e. pbox(x, c, r) = proj_{[l,u]}(x)
254+
return np.minimum(np.maximum(x, l), u)

0 commit comments

Comments
 (0)