diff --git a/dev_notebooks/save_new_legacy_file.ipynb b/dev_notebooks/save_new_legacy_file.ipynb new file mode 100644 index 00000000..8e849a95 --- /dev/null +++ b/dev_notebooks/save_new_legacy_file.ipynb @@ -0,0 +1,59 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare a New \"Legacy Test File\"\n", + "This notebook loads the data from the first legacy test file and saves it using a new version of the library in the latest save file format. This will ensure a record of the file format exists for the purpose of regression tests." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from paretobench import Experiment" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "exp = Experiment.load(\"../tests/legacy_file_formats/paretobench_file_format_v1.0.0.h5\")\n", + "exp.save(\"../tests/legacy_file_formats/paretobench_file_format_v1.1.0.h5\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "paretobench", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/environment.yml b/environment.yml index 6e50cfae..56103134 100644 --- a/environment.yml +++ b/environment.yml @@ -1,3 +1,6 @@ name: paretobench dependencies: - pytest + - pip + - pip: + - pre-commit \ No newline at end of file diff --git a/example_notebooks/container_objects.ipynb b/example_notebooks/container_objects.ipynb index 554f6bea..4f9baa51 100644 --- a/example_notebooks/container_objects.ipynb +++ b/example_notebooks/container_objects.ipynb @@ -14,6 +14,7 @@ "metadata": {}, "outputs": [], "source": [ + "import numpy as np\n", "import os\n", "import paretobench as pb\n", "import tempfile" @@ -36,17 +37,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "Population(size=32, vars=10, objs=2, cons=0, fevals=0)\n", + "Population(size=32, vars=10, objs=[--], cons=[], fevals=32)\n", "Decision variables: (32, 10)\n", "Objectives: (32, 2)\n", "Constraints: (32, 0)\n", - "Function evaluations: 0\n" + "Function evaluations: 32\n" ] } ], "source": [ "# Create an example population\n", - "pop = pb.Population.from_random(n_objectives=2, n_decision_vars=10, n_constraints=0, pop_size=32)\n", + "pop = pb.Population(\n", + " x=np.random.random((32, 10)), # 32 individuals worth of 10 decision vars\n", + " f=np.random.random((32, 2)), # 2 objectives\n", + " # Not specifying an array (constraints here) will cause it to populate empty based on other's shape\n", + " fevals=32,\n", + ")\n", "print(pop)\n", "\n", "# Examine some of the parameters\n", @@ -67,12 +73,66 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Population(size=32, vars=10, objs=[--], cons=[>0.0e+00], fevals=32)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a population with some names\n", + "pop = pb.Population(\n", + " x=np.random.random((32, 10)),\n", + " f=np.random.random((32, 2)),\n", + " g=np.random.random((32, 1)), # Add a single constraint this time\n", + " names_x=[f\"Decision var {idx}\" for idx in range(pop.x.shape[1])],\n", + " names_f=[\"Objective_1\", \"Objective_2\"],\n", + " names_g=[\"Important_Constraint\"],\n", + ")\n", + "pop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Populations default to representing minimization problems with constraints that are satisfied when each g >= 0. This can be modified as in the following code block. Configuring these settings is important when using container methods to find feasible individuals and in calculation of domination." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Population(size=32, vars=10, objs=[+-+], cons=[>3.3e-01,<6.6e-01], fevals=32)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Set naems of the decision variables, objectives, and constraints\n", - "pop.names_x = [f\"Decision var {idx}\" for idx in range(pop.x.shape[1])]\n", - "pop.names_f = [\"Objective 1\", \"Objective 2\"]\n", - "pop.names_g = []" + "# Create a population and specify the objective / constraint directions and target\n", + "pop = pb.Population(\n", + " x=np.random.random((32, 10)),\n", + " f=np.random.random((32, 3)),\n", + " g=np.random.random((32, 2)),\n", + " obj_directions=\"+-+\", # \"+\" indicates maximization, \"-\" inidicates minimization\n", + " constraint_directions=\"><\", # \"<\" means satisifed when g\" means satsified when g>target\n", + " constraint_targets=np.array([0.33, 0.66]), # >0.33 constraint and <0.66 constraint\n", + ")\n", + "\n", + "pop" ] }, { @@ -85,23 +145,32 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "History(problem='WFG1 (n=10)', reports=15, vars=10, objs=2, cons=0)\n", - "Problem: WFG1 (n=10)\n", - "Number of reports: 15\n" + "History(problem='WFG1', reports=10, vars=30, objs=[--], cons=[])\n", + "Problem: WFG1\n", + "Number of reports: 10\n" ] } ], "source": [ - "# Create an example history object\n", - "hist = pb.History.from_random(n_populations=15, n_objectives=2, n_decision_vars=10, n_constraints=0, pop_size=32)\n", - "hist.problem = \"WFG1 (n=10)\"\n", + "# Create some population objects which will go into the history. We will use the random generation helper function here.\n", + "n_reports = 10\n", + "reports = [\n", + " pb.Population.from_random(n_objectives=2, n_decision_vars=30, n_constraints=0, pop_size=50)\n", + " for _ in range(n_reports)\n", + "]\n", + "\n", + "# Construct the history object\n", + "hist = pb.History(\n", + " reports=reports,\n", + " problem=\"WFG1\", # Use ParetoBench single line format for maximum compatibility with plotting, etc\n", + ")\n", "print(hist)\n", "\n", "# Print some of the properties\n", @@ -119,17 +188,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Experiment(name='NSGA-II (default params)', created='2024-12-10', author='The author of ParetoBench', software='ParetoBench example notebook 1.0.0', runs=32)\n", + "Experiment(name='NSGA-II (default params)', created='2025-01-26', author='The author of ParetoBench', software='ParetoBench example notebook 1.0.0', runs=32)\n", "Number of histories: 32\n", "Name: NSGA-II (default params)\n", - "Creation time: 2024-12-10 05:21:51.993640+00:00\n" + "Creation time: 2025-01-26 06:36:29.299300+00:00\n" ] } ], diff --git a/pyproject.toml b/pyproject.toml index 26f45eed..ad52c50c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "paretobench" -version = "0.3.0" +version = "0.4.0" authors = [ { name="Christopher M. Pierce", email="contact@chris-pierce.com" }, ] diff --git a/src/paretobench/containers.py b/src/paretobench/containers.py index 6c779c1b..db3b4594 100644 --- a/src/paretobench/containers.py +++ b/src/paretobench/containers.py @@ -8,7 +8,7 @@ import re import string -from .utils import get_domination +from .utils import get_domination, binary_str_to_numpy class Population(BaseModel): @@ -16,24 +16,39 @@ class Population(BaseModel): Stores the individuals in a population for one reporting interval in a genetic algorithm. Conventional names are used for the decision variables (x), the objectives (f), and inequality constraints (g). The first dimension of each array is the batch dimension. The number of evaluations of the objective functions performed to reach this state is also recorded. - Objectives are assumed to be part of a minimization problem and constraints are set such that individuals with g_i > 0 are - feasible. All arrays must have the same size batch dimension even if they are empty. In this case the non-batch dimension will be zero length. Names may be associated with decision variables, objectives, or constraints in the form of lists. + + Whether each objectives is being minimized or maximized is set by the string obj_directions where each character corresponds + to an objectve and '+' means maximize with '-' meaning minimize. The constraints are configured by the string of directions + `constraint_directions` and the numpy array of targets `constraint_targets`. The string should contain either the '<' or '>' + character for the constraint at that index to be satisfied when it is less than or greater than the target respectively. """ + # The decision vars, objectives, and constraints x: np.ndarray f: np.ndarray g: np.ndarray + + # Total number of function evaluations performed during optimization after this population was completed fevals: int - model_config = ConfigDict(arbitrary_types_allowed=True) # Optional lists of names for decision variables, objectives, and constraints names_x: Optional[List[str]] = None names_f: Optional[List[str]] = None names_g: Optional[List[str]] = None + # Configuration of objectives/constraints (minimization or maximization problem, direction of and target of constraint) + obj_directions: str # '+' means maximize, '-' means minimize + constraint_directions: ( + str # '<' means satisfied when less-than target, '>' means satisfied when greater-than target + ) + constraint_targets: np.ndarray + + # Pydantic config + model_config = ConfigDict(arbitrary_types_allowed=True) + @model_validator(mode="before") @classmethod def set_default_vals(cls, values): @@ -58,6 +73,14 @@ def set_default_vals(cls, values): if values.get("g") is None: values["g"] = np.empty((batch_size, 0), dtype=np.float64) + # Handle objectives / constraints settings (default to canonical problem) + if values.get("obj_directions") is None: + values["obj_directions"] = "-" * values["f"].shape[1] + if values.get("constraint_directions") is None: + values["constraint_directions"] = ">" * values["g"].shape[1] + if values.get("constraint_targets") is None: + values["constraint_targets"] = np.zeros(values["g"].shape[1], dtype=float) + # Set fevals to number of individuals if not included if values.get("fevals") is None: values["fevals"] = batch_size @@ -88,11 +111,52 @@ def validate_names(self): raise ValueError("Length of names_g must match the number of constraints in g.") return self + @model_validator(mode="after") + def validate_obj_directions(self): + if len(self.obj_directions) != self.m: + raise ValueError( + "Length of obj_directions must match number of objectives, got" + f" {len(self.obj_directions)} chars but we have {self.m} objectives" + ) + if not all(c in "+-" for c in self.obj_directions): + raise ValueError(f'obj_directions must contain only + or - characters. Got: "{self.obj_directions}"') + return self + + @model_validator(mode="after") + def validate_constraint_directions(self): + if len(self.constraint_directions) != self.n_constraints: + raise ValueError( + "Length of constraint_directions must match number of constraints, got" + f" {len(self.constraint_directions)} chars but we have {self.n_constraints} constraints" + ) + if not all(c in "<>" for c in self.constraint_directions): + raise ValueError( + f'constraint_directions must contain only < or > characters. Got: "{self.constraint_directions}"' + ) + return self + + @model_validator(mode="after") + def validate_constraint_targets(self): + # Check the targets + attr = "constraint_targets" + if not isinstance(getattr(self, attr), np.ndarray): + raise ValueError(f"{attr} must be a numpy array, got: {type(getattr(self, attr))}") + if len(getattr(self, attr).shape) != 1: + raise ValueError(f"{attr} must be 1D, shape was: {getattr(self, attr).shape}") + if len(getattr(self, attr)) != self.g.shape[1]: + raise ValueError( + f"Length of {attr} must match number of constraints in g. Got {len(getattr(self, attr))} " + f"elements and {self.g.shape[1]} constraints from g" + ) + if getattr(self, attr).dtype != np.float64: + raise ValueError(f"{attr} dtype must be {np.float64}. Got {getattr(self, attr).dtype}") + return self + @field_validator("x", "f", "g") @classmethod def validate_numpy_arrays(cls, value: np.ndarray, info) -> np.ndarray: """ - Double checks that the arrays have the right numbe of dimensions and datatype. + Double checks that the arrays have the right number of dimensions and datatype. """ if value.dtype != np.float64: raise TypeError(f"Expected array of type { np.float64} for field '{info.field_name}', got {value.dtype}") @@ -119,8 +183,27 @@ def __eq__(self, other): and self.names_x == other.names_x and self.names_f == other.names_f and self.names_g == other.names_g + and self.obj_directions == other.obj_directions + and self.constraint_directions == other.constraint_directions + and np.array_equal(self.constraint_targets, other.constraint_targets) ) + @property + def f_canonical(self): + """ + Return the objectives transformed so that we are a minimization problem. + """ + return binary_str_to_numpy(self.obj_directions, "-", "+")[None, :] * self.f + + @property + def g_canonical(self): + """ + Return constraints transformed such that g[...] >= 0 are the feasible solutions. + """ + gc = binary_str_to_numpy(self.constraint_directions, ">", "<")[None, :] * self.g + gc += binary_str_to_numpy(self.constraint_directions, "<", ">")[None, :] * self.constraint_targets[None, :] + return gc + def __add__(self, other: "Population") -> "Population": """ The sum of two populations is defined here as the population containing all unique individuals from both (set union). @@ -129,13 +212,19 @@ def __add__(self, other: "Population") -> "Population": if not isinstance(other, Population): raise TypeError("Operands must be instances of Population") - # Check that the names are consistent + # Check that the names/settings are consistent if self.names_x != other.names_x: raise ValueError("names_x are inconsistent between populations") if self.names_f != other.names_f: raise ValueError("names_f are inconsistent between populations") if self.names_g != other.names_g: raise ValueError("names_g are inconsistent between populations") + if self.obj_directions != other.obj_directions: + raise ValueError("obj_directions are inconsistent between populations") + if self.constraint_directions != other.constraint_directions: + raise ValueError("constraint_directions are inconsistent between populations") + if not np.array_equal(self.constraint_targets, other.constraint_targets): + raise ValueError("constraint_targets are inconsistent between populations") # Concatenate the arrays along the batch dimension (axis=0) new_x = np.concatenate((self.x, other.x), axis=0) @@ -160,6 +249,9 @@ def __add__(self, other: "Population") -> "Population": names_x=self.names_x, names_f=self.names_f, names_g=self.names_g, + obj_directions=self.obj_directions, + constraint_directions=self.constraint_directions, + constraint_targets=self.constraint_targets, ) def __getitem__(self, idx: Union[slice, np.ndarray, List[int]]) -> "Population": @@ -184,18 +276,21 @@ def __getitem__(self, idx: Union[slice, np.ndarray, List[int]]) -> "Population": names_x=self.names_x, names_f=self.names_f, names_g=self.names_g, + obj_directions=self.obj_directions, + constraint_directions=self.constraint_directions, + constraint_targets=self.constraint_targets, ) def get_nondominated_indices(self): """ Returns a boolean array of whether or not an individual is non-dominated. """ - return np.sum(get_domination(self.f, self.g), axis=0) == 0 + return np.sum(get_domination(self.f_canonical, self.g_canonical), axis=0) == 0 def get_feasible_indices(self): if self.g.shape[1] == 0: return np.ones((len(self)), dtype=bool) - return np.all(self.g >= 0.0, axis=1) + return np.all(self.g_canonical >= 0.0, axis=1) def get_nondominated_set(self): return self[self.get_nondominated_indices()] @@ -209,6 +304,7 @@ def from_random( pop_size: int, fevals: int = 0, generate_names: bool = False, + generate_obj_constraint_settings: bool = False, ) -> "Population": """ Generate a randomized instance of the Population class. @@ -227,6 +323,8 @@ def from_random( The number of evaluations of the objective functions performed to reach this state, by default 0. generate_names : bool, optional Whether to include names for the decision variables, objectives, and constraints, by default False. + generate_obj_constraint_settings : bool, optional + Randomize the objective and constraint settings, default to minimization problem and g >= 0 constraint Returns ------- @@ -248,12 +346,27 @@ def from_random( """ x = np.random.rand(pop_size, n_decision_vars) f = np.random.rand(pop_size, n_objectives) - g = np.random.rand(pop_size, n_constraints) if n_constraints > 0 else np.empty((pop_size, 0)) + g = np.random.rand(pop_size, n_constraints) - 0.5 if n_constraints > 0 else np.empty((pop_size, 0)) # Optionally generate names if generate_names is True - names_x = [f"x{i+1}" for i in range(n_decision_vars)] if generate_names else None - names_f = [f"f{i+1}" for i in range(n_objectives)] if generate_names else None - names_g = [f"g{i+1}" for i in range(n_constraints)] if generate_names else None + if generate_names: + names_x = [f"x{i+1}" for i in range(n_decision_vars)] + names_f = [f"f{i+1}" for i in range(n_objectives)] + names_g = [f"g{i+1}" for i in range(n_constraints)] + else: + names_x = None + names_f = None + names_g = None + + # Create randomized settings for objectives/constraints + if generate_obj_constraint_settings: + obj_directions = "".join(np.random.choice(["+", "-"], size=n_objectives)) + constraint_directions = "".join(np.random.choice([">", "<"], size=n_constraints)) + constraint_targets = np.random.rand(n_constraints) - 0.5 + else: + obj_directions = None + constraint_directions = None + constraint_targets = None return cls( x=x, @@ -263,13 +376,26 @@ def from_random( names_x=names_x, names_f=names_f, names_g=names_g, + obj_directions=obj_directions, + constraint_directions=constraint_directions, + constraint_targets=constraint_targets, ) def __len__(self): return self.x.shape[0] + def _get_constraint_direction_str(self) -> str: + # Create a list of >/< symbols with their targets + return "[" + ",".join(f"{d}{t:.1e}" for d, t in zip(self.constraint_directions, self.constraint_targets)) + "]" + def __repr__(self) -> str: - return f"Population(size={len(self)}, vars={self.x.shape[1]}, objs={self.f.shape[1]}, cons={self.g.shape[1]}, fevals={self.fevals})" + return ( + f"Population(size={len(self)}, " + f"vars={self.x.shape[1]}, " + f"objs=[{self.obj_directions}], " + f"cons={self._get_constraint_direction_str()}, " + f"fevals={self.fevals})" + ) def __str__(self): return self.__repr__() @@ -281,7 +407,7 @@ def count_unique_individuals(self, decimals=13): Parameters ---------- decimals : int, optional - _description_, by default 13 + Number of digits to which we will compare the values, by default 13 Returns ------- @@ -320,7 +446,7 @@ class History(BaseModel): Assumptions: - All reports must have a consistent number of objectives, decision variables, and constraints. - - Names, if used, must be consistent across populations + - Objective/constraint settings and names, if used, must be consistent across populations """ reports: List[Population] @@ -356,6 +482,17 @@ def validate_consistent_populations(self): if names_g and len(set(names_g)) != 1: raise ValueError(f"Inconsistent names for constraints in reports: {names_g}") + # Check settings for objectives and constraints + obj_directions = [x.obj_directions for x in self.reports] + constraint_directions = [x.constraint_directions for x in self.reports] + constraint_targets = [tuple(x.constraint_targets) for x in self.reports] + if obj_directions and len(set(obj_directions)) != 1: + raise ValueError(f"Inconsistent obj_directions in reports: {obj_directions}") + if constraint_directions and len(set(constraint_directions)) != 1: + raise ValueError(f"Inconsistent constraint_directions in reports: {constraint_directions}") + if constraint_targets and len(set(constraint_targets)) != 1: + raise ValueError(f"Inconsistent constraint_targets in reports: {constraint_targets}") + return self def __eq__(self, other): @@ -372,6 +509,7 @@ def from_random( n_constraints: int, pop_size: int, generate_names: bool = False, + generate_obj_constraint_settings: bool = False, ) -> "History": """ Generate a randomized instance of the History class, including random problem name and metadata. @@ -390,6 +528,8 @@ def from_random( The number of individuals in each population. generate_names : bool, optional Whether to include names for the decision variables, objectives, and constraints, by default False. + generate_obj_constraint_settings : bool, optional + Randomize the objective and constraint settings, default to minimization problem and g >= 0 constraint Returns ------- @@ -423,6 +563,16 @@ def from_random( for i in range(n_populations) ] + # Create randomized settings for objectives/constraints (must be consistent between objects) + if generate_obj_constraint_settings: + obj_directions = "".join(np.random.choice(["+", "-"], size=n_objectives)) + constraint_directions = "".join(np.random.choice([">", "<"], size=n_constraints)) + constraint_targets = np.random.rand(n_constraints) + for report in reports: + report.obj_directions = obj_directions + report.constraint_directions = constraint_directions + report.constraint_targets = constraint_targets + return cls(reports=reports, problem=problem, metadata=metadata) def _to_h5py_group(self, g: h5py.Group): @@ -463,6 +613,12 @@ def _to_h5py_group(self, g: h5py.Group): if self.reports and self.reports[0].names_g is not None: g["g"].attrs["names"] = self.reports[0].names_g + # Save the configuration data + if self.reports: + g["f"].attrs["directions"] = self.reports[0].obj_directions + g["g"].attrs["directions"] = self.reports[0].constraint_directions + g["g"].attrs["targets"] = self.reports[0].constraint_targets + @classmethod def _from_h5py_group(cls, grp: h5py.Group): """ @@ -484,6 +640,11 @@ def _from_h5py_group(cls, grp: h5py.Group): g = grp["g"][()] # Get the names + obj_directions = grp["f"].attrs.get("directions", None) + constraint_directions = grp["g"].attrs.get("directions", None) + constraint_targets = grp["g"].attrs.get("targets", None) + + # Get the objective / constraint settings names_x = grp["x"].attrs.get("names", None) names_f = grp["f"].attrs.get("names", None) names_g = grp["g"].attrs.get("names", None) @@ -501,6 +662,9 @@ def _from_h5py_group(cls, grp: h5py.Group): names_x=names_x, names_f=names_f, names_g=names_g, + obj_directions=obj_directions, + constraint_directions=constraint_directions, + constraint_targets=constraint_targets, ) ) start_idx += pop_size @@ -540,8 +704,8 @@ def pf_reduce(a, b): def __repr__(self) -> str: dims = ( ( - f"vars={self.reports[0].x.shape[1]}, objs={self.reports[0].f.shape[1]}, " - f"cons={self.reports[0].g.shape[1]}" + f"vars={self.reports[0].x.shape[1]}, objs=[{self.reports[0].obj_directions}], " + f"cons={self.reports[0]._get_constraint_direction_str()}" ) if self.reports else "empty" @@ -568,7 +732,7 @@ class Experiment(BaseModel): software_version: str = "" comment: str = "" creation_time: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) - file_version: str = "1.0.0" + file_version: str = "1.1.0" def __eq__(self, other): if not isinstance(other, Experiment): @@ -609,6 +773,7 @@ def from_random( n_constraints: int, pop_size: int, generate_names: bool = False, + generate_obj_constraint_settings: bool = False, ) -> "Experiment": """ Generate a randomized instance of the Experiment class. @@ -629,6 +794,8 @@ def from_random( The number of individuals in each population. generate_names : bool, optional Whether to include names for the decision variables, objectives, and constraints, by default False. + generate_obj_constraint_settings : bool, optional + Randomize the objective and constraint settings, default to minimization problem and g >= 0 constraint Returns ------- @@ -644,6 +811,7 @@ def from_random( n_constraints, pop_size, generate_names=generate_names, + generate_obj_constraint_settings=generate_obj_constraint_settings, ) for _ in range(n_histories) ] diff --git a/src/paretobench/plotting/attainment.py b/src/paretobench/plotting/attainment.py index 22ded9a5..3dac9b60 100644 --- a/src/paretobench/plotting/attainment.py +++ b/src/paretobench/plotting/attainment.py @@ -1,7 +1,7 @@ import numpy as np from ..containers import Population -from ..utils import get_nondominated_inds +from ..utils import get_nondominated_inds, binary_str_to_numpy def get_reference_point(population: Population, padding=0.1): @@ -20,10 +20,10 @@ def get_reference_point(population: Population, padding=0.1): ndarray Reference point coordinates calculated as max values plus padding. """ - min_vals = np.min(population.f, axis=0) - max_vals = np.max(population.f, axis=0) + min_vals = np.min(population.f_canonical, axis=0) + max_vals = np.max(population.f_canonical, axis=0) ref_point = max_vals + (max_vals - min_vals) * padding - return ref_point + return ref_point * binary_str_to_numpy(population.obj_directions, "-", "+") def compute_attainment_surface_2d(population: Population, ref_point=None, padding=0.1): @@ -58,19 +58,21 @@ def compute_attainment_surface_2d(population: Population, ref_point=None, paddin # Handle missing ref-point if ref_point is None: ref_point = get_reference_point(population, padding=padding) - ref_point = np.asarray(ref_point) + ref_point = np.asarray(ref_point) * binary_str_to_numpy(population.obj_directions, "-", "+") # Get only nondominated points population = population.get_nondominated_set() - if not np.all(ref_point >= np.max(population.f, axis=0)): + # Check reference point is reasonable + max_canonical = np.max(population.f_canonical, axis=0) + if not np.all(ref_point >= max_canonical): raise ValueError( - f"Reference point coordinates must exceed all points in non-dominated set " - f"(ref_point={ref_point}, max_pf=({np.max(population.f[:, 0])}, {np.max(population.f[:, 1])}))" + f"Reference point must be dominated by all points in non-dominated set " + f"(ref_point={ref_point}, max_non_dominated_canonical=({max_canonical[0]}, {max_canonical[1]}))" ) - # Sort points by x coordinate (first objective) - sorted_points = population.f[np.argsort(population.f[:, 0])] + # Sort points by x coordinate (first one) with canonical objectives + sorted_points = population.f_canonical[np.argsort(population.f_canonical[:, 0])] # Initialize the surface points list with the first point surface = [] @@ -86,7 +88,9 @@ def compute_attainment_surface_2d(population: Population, ref_point=None, paddin # Add the next point surface.append(next_point) surface = np.array(surface) - return np.concatenate( + + # Add "arms" going out to reference point boundary to surface + surface = np.concatenate( ( [[surface[0, 0], ref_point[1]]], surface, @@ -95,6 +99,9 @@ def compute_attainment_surface_2d(population: Population, ref_point=None, paddin axis=0, ) + # Do inverse transformation from canonical objectives to actual objectives + return surface * binary_str_to_numpy(population.obj_directions, "-", "+")[None, :] + def save_mesh_to_stl(vertices: np.ndarray, triangles: np.ndarray, filename: str): """ @@ -294,22 +301,21 @@ def compute_attainment_surface_3d(population: Population, ref_point=None, paddin # If no reference point provided, compute one if ref_point is None: ref_point = get_reference_point(population, padding=padding) - ref_point = np.asarray(ref_point) - - if not np.all(ref_point >= np.max(population.f, axis=0)): - raise ValueError("Reference point must dominate all points") + ref_point = np.asarray(ref_point) * binary_str_to_numpy(population.obj_directions, "-", "+") # Get the nondominated points population = population.get_nondominated_set() + if not np.all(ref_point >= np.max(population.f_canonical, axis=0)): + raise ValueError("Reference point must be dominated by all non-dominated points in set") vertices = [] triangles = [] vertex_dict = {} # Sort points in each dimension - sorted_by_z = population.f[np.argsort(population.f[:, 2])] # For XY plane - sorted_by_x = population.f[np.argsort(population.f[:, 0])] # For YZ plane - sorted_by_y = population.f[np.argsort(population.f[:, 1])] # For XZ plane + sorted_by_z = population.f_canonical[np.argsort(population.f_canonical[:, 2])] # For XY plane + sorted_by_x = population.f_canonical[np.argsort(population.f_canonical[:, 0])] # For YZ plane + sorted_by_y = population.f_canonical[np.argsort(population.f_canonical[:, 1])] # For XZ plane # Process XY plane (sorted by Z) triangles.extend(mesh_plane(sorted_by_z, 2, 0, 1, ref_point, vertex_dict, vertices)[1]) @@ -320,4 +326,8 @@ def compute_attainment_surface_3d(population: Population, ref_point=None, paddin # Process XZ plane (sorted by Y) triangles.extend(mesh_plane(sorted_by_y, 1, 0, 2, ref_point, vertex_dict, vertices)[1]) - return np.array(vertices), np.array(triangles) + # Convert to numpy arrays and correct for canonicalized objectives + vertices = np.array(vertices) * binary_str_to_numpy(population.obj_directions, "-", "+")[None, :] + triangles = np.array(triangles) + + return vertices, triangles diff --git a/src/paretobench/plotting/history.py b/src/paretobench/plotting/history.py index 1ca7f477..dd9746ed 100644 --- a/src/paretobench/plotting/history.py +++ b/src/paretobench/plotting/history.py @@ -451,7 +451,7 @@ def history_obj_animation( if len(scale.shape) != 1 or scale.shape[0] != history.reports[0].m: raise ValueError( f"Length of scale must match number of objectives. Got scale factors with shape {scale.shape}" - f" and {history.reports[0].f.shape[1]} objectives." + f" and {history.reports[0].m} objectives." ) else: scale = np.ones(history.reports[0].m) diff --git a/src/paretobench/plotting/population.py b/src/paretobench/plotting/population.py index 46685109..fbd8ccd1 100644 --- a/src/paretobench/plotting/population.py +++ b/src/paretobench/plotting/population.py @@ -102,7 +102,7 @@ def population_obj_scatter( if len(scale.shape) != 1 or scale.shape[0] != population.m: raise ValueError( f"Length of scale must match number of objectives. Got scale factors with shape {scale.shape}" - f" and {population.f.shape[1]} objectives." + f" and {population.m} objectives." ) else: scale = np.ones(population.m) @@ -119,10 +119,10 @@ def population_obj_scatter( pf = None if problem is not None: problem = get_problem_from_obj_or_str(problem) - if problem.m != population.f.shape[1]: + if problem.m != population.m: raise ValueError( f'Number of objectives in problem must match number in population. Got {problem.m} objectives from problem "{problem}" ' - f"and {population.f.shape[1]} from the population." + f"and {population.m} from the population." ) if isinstance(problem, ProblemWithPF): pf = problem.get_pareto_front(n_pf) @@ -134,10 +134,10 @@ def population_obj_scatter( pf = np.asarray(pf_objectives) if pf.ndim != 2: raise ValueError("pf_objectives must be a 2D array") - if pf.shape[1] != population.f.shape[1]: + if pf.shape[1] != population.m: raise ValueError( f"Number of objectives in pf_objectives must match number in population. Got {pf.shape[1]} in pf_objectives " - f"and {population.f.shape[1]} in population" + f"and {population.m} in population" ) # Get the point settings for this plot @@ -157,7 +157,7 @@ def population_obj_scatter( # For 2D problems add_legend = False base_color = color - if population.f.shape[1] == 2: + if population.m == 2: # Make axis if not supplied if ax is None: ax = fig.add_subplot(111) @@ -225,7 +225,7 @@ def population_obj_scatter( ax.set_ylabel(labels[obj_idx[1]]) # For 3D problems - elif population.f.shape[1] == 3: + elif population.m == 3: # Get an axis if not supplied if ax is None: ax = fig.add_subplot(111, projection="3d") @@ -283,7 +283,7 @@ def population_obj_scatter( # We can't plot in 4D :( else: - raise ValueError(f"Plotting supports only 2D and 3D objectives currently: n_objs={population.f.shape[1]}") + raise ValueError(f"Plotting supports only 2D and 3D objectives currently: n_objs={population.m}") if add_legend: plt.legend(loc=legend_loc) diff --git a/src/paretobench/plotting/utils.py b/src/paretobench/plotting/utils.py index bf5bd935..85902da8 100644 --- a/src/paretobench/plotting/utils.py +++ b/src/paretobench/plotting/utils.py @@ -80,7 +80,8 @@ def get_per_point_settings_population( # Get the domination ranks (of only the visible solutions so we don't end up with a plot of all invisible points) ranks = np.zeros(len(population)) filtered_indices = np.where(plot_filt)[0] - for rank, idx in enumerate(fast_dominated_argsort(population.f[plot_filt, :], population.g[plot_filt, :])): + idxs = fast_dominated_argsort(population.f_canonical[plot_filt, :], population.g_canonical[plot_filt, :]) + for rank, idx in enumerate(idxs): ranks[filtered_indices[idx]] = rank # Compute alpha from the ranks diff --git a/src/paretobench/utils.py b/src/paretobench/utils.py index ce7ca0c9..20bec785 100644 --- a/src/paretobench/utils.py +++ b/src/paretobench/utils.py @@ -180,3 +180,12 @@ def get_problem_from_obj_or_str(obj_or_str: Union[str, Problem]) -> Problem: return Problem.from_line_fmt(obj_or_str) else: raise ValueError(f"Unrecognized input type: {type(obj_or_str)}") + + +def binary_str_to_numpy(ss, pos_char, neg_char): + """ + Convert the characters of the string ss into a numpy array with +1 being wherever + the character pos_char shows up and -1 being wherever neg_char shows up. + """ + arr = np.array(list(ss)) + return np.where(arr == pos_char, 1, np.where(arr == neg_char, -1, 0)) diff --git a/tests/legacy_file_formats/paretobench_file_format_v1.0.0.h5 b/tests/legacy_file_formats/paretobench_file_format_v1.0.0.h5 new file mode 100644 index 00000000..04fd3ff8 Binary files /dev/null and b/tests/legacy_file_formats/paretobench_file_format_v1.0.0.h5 differ diff --git a/tests/legacy_file_formats/paretobench_file_format_v1.1.0.h5 b/tests/legacy_file_formats/paretobench_file_format_v1.1.0.h5 new file mode 100644 index 00000000..831337a0 Binary files /dev/null and b/tests/legacy_file_formats/paretobench_file_format_v1.1.0.h5 differ diff --git a/tests/test_containers.py b/tests/test_containers.py index 63d6c6a5..43c7b615 100644 --- a/tests/test_containers.py +++ b/tests/test_containers.py @@ -3,10 +3,38 @@ import os import pytest import tempfile +from pathlib import Path +from paretobench.analyze_metrics import normalize_problem_name from paretobench.containers import Experiment, Population, History +def get_test_files(): + test_dir = Path(__file__).parent / "legacy_file_formats" + return [f for f in test_dir.glob("*.h5")] + + +@pytest.mark.parametrize("test_file", get_test_files()) +def test_load_legacy_files(test_file): + """ + Test loading different versions of saved experiment files for backwards compatibility. + """ + exp = Experiment.load(test_file) + + # Some basic checks + assert len(exp.runs) == 6 + for run in exp.runs: + assert len(run) == 8 + assert len(run.reports[0]) == 50 + assert run.reports[0].m == 2 + assert run.reports[0].n == 5 + assert run.reports[0].g.shape[1] == 0 + + # Check problems are right + probs = set(normalize_problem_name(x.problem) for x in exp.runs) + assert probs == {"ZDT2 (n=5)", "ZDT4 (n=5)", "ZDT6 (n=5)"} + + @pytest.mark.parametrize("generate_names", [False, True]) def test_experiment_save_load(generate_names): """ @@ -21,6 +49,7 @@ def test_experiment_save_load(generate_names): n_constraints=2, pop_size=50, generate_names=generate_names, + generate_obj_constraint_settings=True, ) # Use a temporary directory to save the file @@ -327,3 +356,66 @@ def test_count_unique_individuals(): g=np.array([[3.0], [4.0], [3.0]]), ) assert pop_empty_dims.count_unique_individuals() == 2 + + +def test_get_feasible_indices(): + # Single less-than constraint (g <= 1) + pop1 = Population( + x=np.empty((3, 0)), + f=np.empty((3, 0)), + g=np.array([[0.5], [1.5], [1.0]]), + constraint_directions="<", + constraint_targets=np.array([1.0]), + ) + assert np.array_equal(pop1.get_feasible_indices(), np.array([True, False, True])) + + # Single greater-than constraint (g >= 1) + pop2 = Population( + x=np.empty((3, 0)), + f=np.empty((3, 0)), + g=np.array([[0.5], [1.5], [1.0]]), + constraint_directions=">", + constraint_targets=np.array([1.0]), + ) + assert np.array_equal(pop2.get_feasible_indices(), np.array([False, True, True])) + + # Single less-than constraint (g <= -1) + pop1 = Population( + x=np.empty((3, 0)), + f=np.empty((3, 0)), + g=np.array([[-0.5], [-1.5], [-1.0]]), + constraint_directions="<", + constraint_targets=np.array([-1.0]), + ) + assert np.array_equal(pop1.get_feasible_indices(), np.array([False, True, True])) + + # Single greater-than constraint (g >= -1) + pop2 = Population( + x=np.empty((3, 0)), + f=np.empty((3, 0)), + g=np.array([[-0.5], [-1.5], [-1.0]]), + constraint_directions=">", + constraint_targets=np.array([-1.0]), + ) + assert np.array_equal(pop2.get_feasible_indices(), np.array([True, False, True])) + + # Multiple mixed constraints (g1 <= 0, g2 >= 1) + pop3 = Population( + x=np.empty((4, 0)), + f=np.empty((4, 0)), + g=np.array( + [ + [-0.5, 1.5], + [0.5, 0.5], + [-1.0, 1.0], + [0.1, 2.0], + ] + ), + constraint_directions="<>", + constraint_targets=np.array([0.0, 1.0]), + ) + assert np.array_equal(pop3.get_feasible_indices(), np.array([True, False, True, False])) + + # No constraints + pop4 = Population(x=np.empty((3, 0)), f=np.empty((3, 0)), g=np.empty((3, 0))) + assert np.array_equal(pop4.get_feasible_indices(), np.array([True, True, True]))