Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 77 additions & 30 deletions xopt/numerical_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import warnings
from abc import ABC, abstractmethod
from typing import Optional

import torch
from botorch.acquisition import AcquisitionFunction
from botorch.optim import optimize_acqf
from pydantic import ConfigDict, Field, PositiveFloat, PositiveInt
from pydantic import ConfigDict, Field, PositiveFloat, PositiveInt, field_validator
from torch import Tensor

from xopt.pydantic import XoptBaseModel
Expand All @@ -14,13 +15,6 @@ class NumericalOptimizer(XoptBaseModel, ABC):
"""
Base class for numerical optimizers.

Attributes
----------
name : str
The name of the optimizer. Default is "base_numerical_optimizer".
model_config : ConfigDict
Configuration dictionary with extra fields forbidden.

Methods
-------
optimize(function, bounds, n_candidates=1, **kwargs)
Expand All @@ -42,47 +36,89 @@ class LBFGSOptimizer(NumericalOptimizer):
Attributes
----------
n_restarts : PositiveInt
Number of restarts during acquisition function optimization, default is 20.
Number of restarts (independent initial conditions) for optimization, default is 20.
n_raw_samples : PositiveInt
Number of raw samples used to pick the initial `n_restarts` points, default is 128.
max_iter : PositiveInt
Maximum number of iterations for the optimizer, default is 2000.
max_time : Optional[PositiveFloat]
Maximum time allowed for optimization, default is None (no time limit).
max_ls : Optional[PositiveInt]
Maximum number of line search steps, default is None (use scipy defaults).
with_grad : bool
Whether to use autograd (True, default) or finite difference (False) for gradient computation.
ftol: Optional[PositiveFloat]
Convergence tolerance on the acquisition function value. If None, use scipy defaults.
pgtol: Optional[PositiveFloat]
Convergence tolerance on the projected gradient. If None, use scipy defaults.
sequential : bool
Use sequential (True) or joint (False, default) optimization when multiple candidates are requested.

Methods
-------
optimize(function, bounds, n_candidates=1, **kwargs)
Optimize the given acquisition function within the specified bounds.

Parameters
----------
function : callable
The acquisition function to be optimized.
bounds : Tensor
The bounds within which to optimize the acquisition function. Must have shape [2, ndim].
n_candidates : int, optional
Number of candidates to return, default is 1.
**kwargs : dict
Additional keyword arguments to pass to the optimizer.

Returns
-------
candidates : Tensor
The optimized candidates.
"""

name: str = Field("LBFGS", frozen=True)
n_restarts: PositiveInt = Field(
20, description="number of restarts during acquisition function optimization"
20,
description="number of restarts (independent initial conditions) for optimization",
)
n_raw_samples: Optional[PositiveInt] = Field(
128,
description="number of raw samples - `n_restarts` best ones are selected from this set",
)
max_iter: PositiveInt = Field(
2000, description="maximum number of optimization steps"
)
max_time: Optional[PositiveFloat] = Field(
5.0, description="maximum time for optimization in seconds"
)

max_ls: Optional[PositiveInt] = Field(
None,
description="maximum number of line search steps. If None, use scipy defaults.",
)
with_grad: bool = Field(
True,
description="Use autograd (true) or finite difference (false) for gradient computation."
" Use autograd unless it is impossible (e.g. non-differentiable elements).",
)
eps: Optional[PositiveFloat] = Field(
None,
description="Step size used for finite difference if with_grad is False."
" If None, use scipy default of 1e-08.",
)
ftol: Optional[PositiveFloat] = Field(
None,
description="Convergence tolerance on the acquisition function value."
" If None, use scipy default of ftol = 1e7 * numpy.finfo(float).eps = 2.22e-09.",
)
pgtol: Optional[PositiveFloat] = Field(
None,
description="Convergence tolerance on the projected gradient."
" If None, use scipy default of 1e-05.",
)
sequential: bool = Field(
False,
description="Use sequential (true) or joint (false) optimization when q > 1."
" In practice, joint optimization is faster but requires more GPU memory, "
" and sequential optimization is slightly more robust (especially with high q)",
)
model_config = ConfigDict(validate_assignment=True)

@field_validator("n_raw_samples", mode="after")
def validate_n_raw_samples(cls, v, info):
if v is None:
return 128
n_restarts = info.data.get("n_restarts", 20)
if v < n_restarts:
warnings.warn(
f"n_raw_samples should be >= than n_restarts, setting to n_restarts={n_restarts}"
)
v = n_restarts
return v

def optimize(self, function, bounds, n_candidates=1, **kwargs):
"""
Optimize the given acquisition function within the specified bounds.
Expand All @@ -108,21 +144,32 @@ def optimize(self, function, bounds, n_candidates=1, **kwargs):
if len(bounds) != 2:
raise ValueError("bounds must have the shape [2, ndim]")

# emperical testing showed that the max time is overrun slightly on the botorch side
# empirical testing showed that the max time is overrun slightly on the botorch side
# fix by slightly reducing the max time passed to this function
if self.max_time is not None:
max_time = self.max_time * 0.8 - 0.01
else:
max_time = None

options = {
"maxiter": self.max_iter,
"with_grad": self.with_grad,
}
if self.ftol is not None:
options["ftol"] = self.ftol
if self.pgtol is not None:
options["pgtol"] = self.pgtol
if self.max_ls is not None:
options["max_ls"] = self.max_ls

candidates, _ = optimize_acqf(
acq_function=function,
bounds=bounds,
q=n_candidates,
raw_samples=self.n_restarts,
raw_samples=self.n_raw_samples,
num_restarts=self.n_restarts,
timeout_sec=max_time,
options={"maxiter": self.max_iter},
options=options,
**kwargs,
)
return candidates
Expand Down
18 changes: 17 additions & 1 deletion xopt/tests/test_numerical_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def f(X):
# q_reduction converts it into `sample_shape x batch_shape`
# sample_reduction converts it into `batch_shape`-dim Tensor of acquisition values
# so, final fake tensor needs to have ndim=1
result = torch.amax(X, dim=(1, 2))
assert X.ndim == 3, "X should be a 3D tensor with shape [batch_shape, q, d]"
result = torch.amax(X, dim=(1, 2)) * 2
return result


Expand All @@ -29,13 +30,28 @@ def test_base(self):

def test_lbfgs_optimizer(self):
optimizer = LBFGSOptimizer()
optimizer2 = LBFGSOptimizer(with_grad=False, eps=1e-10)
for ndim in [1, 3]:
bounds = torch.stack((torch.zeros(ndim), torch.ones(ndim)))
for ncandidate in [1, 3]:
# np.random.seed(42)
# torch.manual_seed(42)
candidates = optimizer.optimize(
function=f, bounds=bounds, n_candidates=ncandidate
)
# np.random.seed(42)
# torch.manual_seed(42)
candidates2 = optimizer2.optimize(
function=f, bounds=bounds, n_candidates=ncandidate
)
assert candidates.shape == torch.Size([ncandidate, ndim])
assert candidates2.shape == torch.Size([ncandidate, ndim])
f1 = f(candidates.unsqueeze(0))
f2 = f(candidates2.unsqueeze(0))
if ncandidate == 1:
# for multiple candidates the optimizers don't converge to the same point
assert torch.allclose(candidates, candidates2, rtol=0.0, atol=0.05)
assert torch.allclose(f1, f2, rtol=0.0, atol=0.05)

# test max time
max_time_optimizer = LBFGSOptimizer(max_time=1.0)
Expand Down
Loading