⬆️ Update to gymnasium

MathisFederico · MathisFederico · commit d8707dc4c163 · 2025-01-12T11:52:36.000+01:00
Refactor to centralize gym tests
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,7 +23,9 @@ keywords = [
 
 
 [project.optional-dependencies]
-gym = ["gym >= 0.26"]
+gym = [
+    "gymnasium>=1.0.0",
+]
 gui = ["pygame >= 2.1.0", "pygame-menu >= 4.3.8"]
 planning = ["unified_planning[aries,enhsp] >= 1.1.0", "up-enhsp>=0.0.25"]
 htmlvis = ["pyvis<=0.3.1"]
diff --git a/src/hcraft/env.py b/src/hcraft/env.py
@@ -271,7 +271,7 @@
 """
 
 import collections
-from typing import TYPE_CHECKING, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -293,7 +293,7 @@
 
 # Gym is an optional dependency.
 try:
-    import gym
+    import gymnasium as gym
 
     DiscreteSpace = gym.spaces.Discrete
     BoxSpace = gym.spaces.Box
@@ -398,7 +398,9 @@ def action_masks(self) -> np.ndarray:
         """Return boolean mask of valid actions."""
         return np.array([t.is_valid(self.state) for t in self.world.transformations])
 
-    def step(self, action: int):
+    def step(
+        self, action: int | str | np.ndarray
+    ) -> Tuple[np.ndarray, float, bool, bool, dict]:
         """Perform one step in the environment given the index of a wanted transformation.
 
         If the selected transformation can be performed, the state is updated and
@@ -407,6 +409,13 @@ def step(self, action: int):
 
         """
 
+        if isinstance(action, np.ndarray):
+            if not action.size == 1:
+                raise TypeError(
+                    "Actions should be integers corresponding the a transformation index"
+                    f", got array with multiple elements:\n{action}."
+                )
+            action = action.flatten()[0]
         try:
             action = int(action)
         except (TypeError, ValueError) as e:
@@ -433,7 +442,13 @@ def step(self, action: int):
 
         self.current_score += reward
         self.cumulated_score += reward
-        return self._step_output(reward, terminated, truncated)
+        return (
+            self.state.observation,
+            reward,
+            terminated,
+            truncated,
+            self.infos(),
+        )
 
     def render(self, mode: Optional[str] = None, **_kwargs) -> Union[str, np.ndarray]:
         """Render the observation of the agent in a format depending on `render_mode`."""
@@ -451,7 +466,7 @@ def reset(
         *,
         seed: Optional[int] = None,
         options: Optional[dict] = None,
-    ) -> np.ndarray:
+    ) -> Tuple[np.ndarray,]:
         """Resets the state of the environement.
 
         Returns:
@@ -472,7 +487,7 @@ def reset(
 
         self.state.reset()
         self.purpose.reset()
-        return self.state.observation
+        return self.state.observation, self.infos()
 
     def close(self):
         """Closes the environment."""
@@ -540,19 +555,14 @@ def planning_problem(self, **kwargs) -> HcraftPlanningProblem:
         """
         return HcraftPlanningProblem(self.state, self.name, self.purpose, **kwargs)
 
-    def _step_output(self, reward: float, terminated: bool, truncated: bool):
+    def infos(self) -> dict:
         infos = {
             "action_is_legal": self.action_masks(),
             "score": self.current_score,
             "score_average": self.cumulated_score / self.episodes,
         }
         infos.update(self._tasks_infos())
-        return (
-            self.state.observation,
-            reward,
-            terminated or truncated,
-            infos,
-        )
+        return infos
 
     def _tasks_infos(self):
         infos = {}
diff --git a/src/hcraft/examples/light_recursive.py b/src/hcraft/examples/light_recursive.py
@@ -29,7 +29,7 @@
 
 # gym is an optional dependency
 try:
-    import gym
+    import gymnasium as gym
 
     gym.register(
         id="LightRecursiveHcraft-v1",
@@ -41,7 +41,6 @@
 
 
 class LightRecursiveHcraftEnv(HcraftEnv):
-
     """LightRecursive environment."""
 
     def __init__(self, n_items: int = 6, n_required_previous: int = 2, **kwargs):
diff --git a/src/hcraft/examples/minecraft/__init__.py b/src/hcraft/examples/minecraft/__init__.py
@@ -7,6 +7,7 @@
 .. include:: ../../../../docs/images/requirements_graphs/MineHcraft.html
 </div>
 """
+
 from typing import Optional
 
 import hcraft.examples.minecraft.items as items
@@ -21,7 +22,7 @@
 
 # gym is an optional dependency
 try:
-    import gym
+    import gymnasium as gym
 
     ENV_PATH = "hcraft.examples.minecraft.env:MineHcraftEnv"
 
diff --git a/src/hcraft/examples/minicraft/__init__.py b/src/hcraft/examples/minicraft/__init__.py
@@ -1,6 +1,6 @@
 """# MiniHCraft environments
 
-List of environments representing abstractions from 
+List of environments representing abstractions from
 [minigrid environments](https://minigrid.farama.org/environments/minigrid/).
 
 See submodules for each individual environement:
@@ -77,7 +77,7 @@
 MINICRAFT_GYM_ENVS = []
 
 try:
-    import gym
+    import gymnasium as gym
 
     ENV_PATH = "hcraft.examples.minicraft"
 
diff --git a/src/hcraft/examples/random_simple/__init__.py b/src/hcraft/examples/random_simple/__init__.py
@@ -4,7 +4,7 @@
 
 # gym is an optional dependency
 try:
-    import gym
+    import gymnasium as gym
 
     gym.register(
         id="RandomHcraft-v1",
diff --git a/src/hcraft/examples/recursive.py b/src/hcraft/examples/recursive.py
@@ -1,4 +1,4 @@
-""" # Recursive HierarchyCraft Environments
+"""# Recursive HierarchyCraft Environments
 
 The goal of the environment is to get the last item.
 But each item requires all the previous items,
@@ -29,7 +29,7 @@
 
 # gym is an optional dependency
 try:
-    import gym
+    import gymnasium as gym
 
     gym.register(
         id="RecursiveHcraft-v1",
@@ -41,7 +41,6 @@
 
 
 class RecursiveHcraftEnv(HcraftEnv):
-
     """RecursiveHcraft Environment"""
 
     def __init__(self, n_items: int = 6, **kwargs):
diff --git a/src/hcraft/examples/tower.py b/src/hcraft/examples/tower.py
@@ -41,7 +41,7 @@
 from hcraft.task import GetItemTask
 
 try:
-    import gym
+    import gymnasium as gym
 
     gym.register(
         id="TowerHcraft-v1",
@@ -53,7 +53,6 @@
 
 
 class TowerHcraftEnv(HcraftEnv):
-
     """Tower, a tower-structured hierarchical Environment.
 
     Item of given layer requires all items of the previous.
diff --git a/src/hcraft/examples/treasure/__init__.py b/src/hcraft/examples/treasure/__init__.py
@@ -14,7 +14,7 @@
 
 # gym is an optional dependency
 try:
-    import gym
+    import gymnasium as gym
 
     gym.register(
         id="Treasure-v1",
diff --git a/tests/examples/minecraft/test_gym_make.py b/tests/examples/minecraft/test_gym_make.py
diff --git a/tests/examples/test_gym_make.py b/tests/examples/test_gym_make.py
diff --git a/tests/examples/test_random.py b/tests/examples/test_random.py
@@ -1,3 +1,4 @@
+import gymnasium
 import pytest
 import pytest_check as check
 
@@ -6,7 +7,6 @@
 
 
 class TestRandomHcraft:
-
     """Test the RandomHcraft environment"""
 
     @pytest.fixture(autouse=True)
@@ -15,16 +15,6 @@ def setup_method(self):
         self.n_items_per_n_inputs = {0: 1, 1: 5, 2: 10, 4: 1}
         self.n_items = sum(self.n_items_per_n_inputs.values())
 
-    def test_gym_make(self):
-        gym = pytest.importorskip("gym")
-        env: RandomHcraftEnv = gym.make(
-            "RandomHcraft-v1",
-            n_items_per_n_inputs=self.n_items_per_n_inputs,
-            seed=42,
-        )
-        check.equal(len(env.world.items), self.n_items)
-        check.equal(env.seed, 42)
-
     def test_same_seed_same_requirements_graph(self):
         env = RandomHcraftEnv(self.n_items_per_n_inputs, seed=42)
         env2 = RandomHcraftEnv(self.n_items_per_n_inputs, seed=42)
diff --git a/tests/examples/test_recursive.py b/tests/examples/test_recursive.py
diff --git a/tests/examples/test_tower.py b/tests/examples/test_tower.py
diff --git a/tests/test_gym.py b/tests/test_gym.py