diff --git a/xopt/numerical_optimizer.py b/xopt/numerical_optimizer.py index 1ad116a69..3bd1fb9a7 100644 --- a/xopt/numerical_optimizer.py +++ b/xopt/numerical_optimizer.py @@ -1,10 +1,11 @@ +import warnings from abc import ABC, abstractmethod from typing import Optional import torch from botorch.acquisition import AcquisitionFunction from botorch.optim import optimize_acqf -from pydantic import ConfigDict, Field, PositiveFloat, PositiveInt +from pydantic import ConfigDict, Field, PositiveFloat, PositiveInt, field_validator from torch import Tensor from xopt.pydantic import XoptBaseModel @@ -14,13 +15,6 @@ class NumericalOptimizer(XoptBaseModel, ABC): """ Base class for numerical optimizers. - Attributes - ---------- - name : str - The name of the optimizer. Default is "base_numerical_optimizer". - model_config : ConfigDict - Configuration dictionary with extra fields forbidden. - Methods ------- optimize(function, bounds, n_candidates=1, **kwargs) @@ -42,37 +36,38 @@ class LBFGSOptimizer(NumericalOptimizer): Attributes ---------- n_restarts : PositiveInt - Number of restarts during acquisition function optimization, default is 20. + Number of restarts (independent initial conditions) for optimization, default is 20. + n_raw_samples : PositiveInt + Number of raw samples used to pick the initial `n_restarts` points, default is 128. max_iter : PositiveInt Maximum number of iterations for the optimizer, default is 2000. max_time : Optional[PositiveFloat] Maximum time allowed for optimization, default is None (no time limit). + max_ls : Optional[PositiveInt] + Maximum number of line search steps, default is None (use scipy defaults). + with_grad : bool + Whether to use autograd (True, default) or finite difference (False) for gradient computation. + ftol: Optional[PositiveFloat] + Convergence tolerance on the acquisition function value. If None, use scipy defaults. + pgtol: Optional[PositiveFloat] + Convergence tolerance on the projected gradient. If None, use scipy defaults. + sequential : bool + Use sequential (True) or joint (False, default) optimization when multiple candidates are requested. Methods ------- optimize(function, bounds, n_candidates=1, **kwargs) Optimize the given acquisition function within the specified bounds. - - Parameters - ---------- - function : callable - The acquisition function to be optimized. - bounds : Tensor - The bounds within which to optimize the acquisition function. Must have shape [2, ndim]. - n_candidates : int, optional - Number of candidates to return, default is 1. - **kwargs : dict - Additional keyword arguments to pass to the optimizer. - - Returns - ------- - candidates : Tensor - The optimized candidates. """ name: str = Field("LBFGS", frozen=True) n_restarts: PositiveInt = Field( - 20, description="number of restarts during acquisition function optimization" + 20, + description="number of restarts (independent initial conditions) for optimization", + ) + n_raw_samples: Optional[PositiveInt] = Field( + 128, + description="number of raw samples - `n_restarts` best ones are selected from this set", ) max_iter: PositiveInt = Field( 2000, description="maximum number of optimization steps" @@ -80,9 +75,50 @@ class LBFGSOptimizer(NumericalOptimizer): max_time: Optional[PositiveFloat] = Field( 5.0, description="maximum time for optimization in seconds" ) - + max_ls: Optional[PositiveInt] = Field( + None, + description="maximum number of line search steps. If None, use scipy defaults.", + ) + with_grad: bool = Field( + True, + description="Use autograd (true) or finite difference (false) for gradient computation." + " Use autograd unless it is impossible (e.g. non-differentiable elements).", + ) + eps: Optional[PositiveFloat] = Field( + None, + description="Step size used for finite difference if with_grad is False." + " If None, use scipy default of 1e-08.", + ) + ftol: Optional[PositiveFloat] = Field( + None, + description="Convergence tolerance on the acquisition function value." + " If None, use scipy default of ftol = 1e7 * numpy.finfo(float).eps = 2.22e-09.", + ) + pgtol: Optional[PositiveFloat] = Field( + None, + description="Convergence tolerance on the projected gradient." + " If None, use scipy default of 1e-05.", + ) + sequential: bool = Field( + False, + description="Use sequential (true) or joint (false) optimization when q > 1." + " In practice, joint optimization is faster but requires more GPU memory, " + " and sequential optimization is slightly more robust (especially with high q)", + ) model_config = ConfigDict(validate_assignment=True) + @field_validator("n_raw_samples", mode="after") + def validate_n_raw_samples(cls, v, info): + if v is None: + return 128 + n_restarts = info.data.get("n_restarts", 20) + if v < n_restarts: + warnings.warn( + f"n_raw_samples should be >= than n_restarts, setting to n_restarts={n_restarts}" + ) + v = n_restarts + return v + def optimize(self, function, bounds, n_candidates=1, **kwargs): """ Optimize the given acquisition function within the specified bounds. @@ -108,21 +144,32 @@ def optimize(self, function, bounds, n_candidates=1, **kwargs): if len(bounds) != 2: raise ValueError("bounds must have the shape [2, ndim]") - # emperical testing showed that the max time is overrun slightly on the botorch side + # empirical testing showed that the max time is overrun slightly on the botorch side # fix by slightly reducing the max time passed to this function if self.max_time is not None: max_time = self.max_time * 0.8 - 0.01 else: max_time = None + options = { + "maxiter": self.max_iter, + "with_grad": self.with_grad, + } + if self.ftol is not None: + options["ftol"] = self.ftol + if self.pgtol is not None: + options["pgtol"] = self.pgtol + if self.max_ls is not None: + options["max_ls"] = self.max_ls + candidates, _ = optimize_acqf( acq_function=function, bounds=bounds, q=n_candidates, - raw_samples=self.n_restarts, + raw_samples=self.n_raw_samples, num_restarts=self.n_restarts, timeout_sec=max_time, - options={"maxiter": self.max_iter}, + options=options, **kwargs, ) return candidates diff --git a/xopt/tests/test_numerical_optimizer.py b/xopt/tests/test_numerical_optimizer.py index 9166fda55..9973b419f 100644 --- a/xopt/tests/test_numerical_optimizer.py +++ b/xopt/tests/test_numerical_optimizer.py @@ -18,7 +18,8 @@ def f(X): # q_reduction converts it into `sample_shape x batch_shape` # sample_reduction converts it into `batch_shape`-dim Tensor of acquisition values # so, final fake tensor needs to have ndim=1 - result = torch.amax(X, dim=(1, 2)) + assert X.ndim == 3, "X should be a 3D tensor with shape [batch_shape, q, d]" + result = torch.amax(X, dim=(1, 2)) * 2 return result @@ -29,13 +30,28 @@ def test_base(self): def test_lbfgs_optimizer(self): optimizer = LBFGSOptimizer() + optimizer2 = LBFGSOptimizer(with_grad=False, eps=1e-10) for ndim in [1, 3]: bounds = torch.stack((torch.zeros(ndim), torch.ones(ndim))) for ncandidate in [1, 3]: + # np.random.seed(42) + # torch.manual_seed(42) candidates = optimizer.optimize( function=f, bounds=bounds, n_candidates=ncandidate ) + # np.random.seed(42) + # torch.manual_seed(42) + candidates2 = optimizer2.optimize( + function=f, bounds=bounds, n_candidates=ncandidate + ) assert candidates.shape == torch.Size([ncandidate, ndim]) + assert candidates2.shape == torch.Size([ncandidate, ndim]) + f1 = f(candidates.unsqueeze(0)) + f2 = f(candidates2.unsqueeze(0)) + if ncandidate == 1: + # for multiple candidates the optimizers don't converge to the same point + assert torch.allclose(candidates, candidates2, rtol=0.0, atol=0.05) + assert torch.allclose(f1, f2, rtol=0.0, atol=0.05) # test max time max_time_optimizer = LBFGSOptimizer(max_time=1.0)