From 1c25053481f4b5df803b8c500802aa4ba51525ca Mon Sep 17 00:00:00 2001 From: deathcoder Date: Sat, 14 Sep 2024 17:45:23 +0200 Subject: [PATCH 1/3] Fix tests --- stable_baselines3/common/buffers.py | 2 ++ stable_baselines3/common/envs/bit_flipping_env.py | 4 ++-- stable_baselines3/common/utils.py | 3 +++ stable_baselines3/common/vec_env/vec_normalize.py | 2 +- tests/test_spaces.py | 3 +++ 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py index 306b43571..70198d7fc 100644 --- a/stable_baselines3/common/buffers.py +++ b/stable_baselines3/common/buffers.py @@ -135,6 +135,8 @@ def to_torch(self, array: np.ndarray, copy: bool = True) -> th.Tensor: :return: """ if copy: + if hasattr(th, "backends") and th.backends.mps.is_built(): + return th.tensor(array, dtype=th.float32, device=self.device) return th.tensor(array, device=self.device) return th.as_tensor(array, device=self.device) diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py index 3ea0c7bb0..e762d95ff 100644 --- a/stable_baselines3/common/envs/bit_flipping_env.py +++ b/stable_baselines3/common/envs/bit_flipping_env.py @@ -78,11 +78,11 @@ def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]: if self.discrete_obs_space: # The internal state is the binary representation of the # observed one - return int(sum(state[i] * 2**i for i in range(len(state)))) + return int(sum(int(state[i]) * 2**i for i in range(len(state)))) if self.image_obs_space: size = np.prod(self.image_shape) - image = np.concatenate((state * 255, np.zeros(size - len(state), dtype=np.uint8))) + image = np.concatenate((state.astype(np.uint8) * 255, np.zeros(size - len(state), dtype=np.uint8))) return image.reshape(self.image_shape).astype(np.uint8) return state diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py index ff83e0e4c..9e91dac47 100644 --- a/stable_baselines3/common/utils.py +++ b/stable_baselines3/common/utils.py @@ -483,6 +483,8 @@ def obs_as_tensor(obs: Union[np.ndarray, Dict[str, np.ndarray]], device: th.devi if isinstance(obs, np.ndarray): return th.as_tensor(obs, device=device) elif isinstance(obs, dict): + if hasattr(th, "backends") and th.backends.mps.is_built(): + return {key: th.as_tensor(_obs, dtype=th.float32, device=device) for (key, _obs) in obs.items()} return {key: th.as_tensor(_obs, device=device) for (key, _obs) in obs.items()} else: raise Exception(f"Unrecognized type of observation {type(obs)}") @@ -523,6 +525,7 @@ def get_available_accelerator() -> str: """ if hasattr(th, "backends") and th.backends.mps.is_built(): # MacOS Metal GPU + th.set_default_dtype(th.float32) return "mps" elif th.cuda.is_available(): return "cuda" diff --git a/stable_baselines3/common/vec_env/vec_normalize.py b/stable_baselines3/common/vec_env/vec_normalize.py index 391ce342d..cda2370aa 100644 --- a/stable_baselines3/common/vec_env/vec_normalize.py +++ b/stable_baselines3/common/vec_env/vec_normalize.py @@ -254,7 +254,7 @@ def normalize_reward(self, reward: np.ndarray) -> np.ndarray: """ if self.norm_reward: reward = np.clip(reward / np.sqrt(self.ret_rms.var + self.epsilon), -self.clip_reward, self.clip_reward) - return reward + return reward.astype(np.float32) def unnormalize_obs(self, obs: Union[np.ndarray, Dict[str, np.ndarray]]) -> Union[np.ndarray, Dict[str, np.ndarray]]: # Avoid modifying by reference the original object diff --git a/tests/test_spaces.py b/tests/test_spaces.py index e006c1f96..8e5898f2b 100644 --- a/tests/test_spaces.py +++ b/tests/test_spaces.py @@ -4,6 +4,7 @@ import gymnasium as gym import numpy as np import pytest +import torch as th from gymnasium import spaces from gymnasium.spaces.space import Space @@ -151,6 +152,8 @@ def test_discrete_obs_space(model_class, env): ], ) def test_float64_action_space(model_class, obs_space, action_space): + if hasattr(th, "backends") and th.backends.mps.is_built(): + pytest.skip("MPS framework doesn't support float64") env = DummyEnv(obs_space, action_space) env = gym.wrappers.TimeLimit(env, max_episode_steps=200) if isinstance(env.observation_space, spaces.Dict): From f822ef53fcfaed0837f9f218d9b77a4407869ac0 Mon Sep 17 00:00:00 2001 From: deathcoder Date: Tue, 17 Sep 2024 17:40:48 +0200 Subject: [PATCH 2/3] Attempt fix ci: only cast reward from float64 to float32 --- .../common/vec_env/vec_normalize.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/stable_baselines3/common/vec_env/vec_normalize.py b/stable_baselines3/common/vec_env/vec_normalize.py index cda2370aa..ca0a98012 100644 --- a/stable_baselines3/common/vec_env/vec_normalize.py +++ b/stable_baselines3/common/vec_env/vec_normalize.py @@ -125,6 +125,20 @@ def _sanity_checks(self) -> None: f"not {self.observation_space}" ) + @staticmethod + def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray: + """ + Cast `np.float64` reward datatype to `np.float32`, + keep the others dtype unchanged. + + :param dtype: The original action space dtype + :return: ``np.float32`` if the dtype was float64, + the original dtype otherwise. + """ + if reward.dtype == np.float64: + return reward.astype(np.float32) + return reward + def __getstate__(self) -> Dict[str, Any]: """ Gets state for pickling. @@ -254,7 +268,8 @@ def normalize_reward(self, reward: np.ndarray) -> np.ndarray: """ if self.norm_reward: reward = np.clip(reward / np.sqrt(self.ret_rms.var + self.epsilon), -self.clip_reward, self.clip_reward) - return reward.astype(np.float32) + + return self._maybe_cast_reward(reward) def unnormalize_obs(self, obs: Union[np.ndarray, Dict[str, np.ndarray]]) -> Union[np.ndarray, Dict[str, np.ndarray]]: # Avoid modifying by reference the original object From 1ac4a60d52dca76d410f111b445f49c9d2c2a231 Mon Sep 17 00:00:00 2001 From: deathcoder Date: Tue, 17 Sep 2024 18:01:39 +0200 Subject: [PATCH 3/3] allow running workflows from ui --- .github/workflows/ci.yml | 72 ++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b1078cd28..113b9d7bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,10 +5,10 @@ name: CI on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] - + branches: [master] + workflow_dispatch: jobs: build: env: @@ -23,38 +23,38 @@ jobs: python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - # cpu version of pytorch - pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # cpu version of pytorch + pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu - # Install Atari Roms - pip install autorom - wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64 - base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz - AutoROM --accept-license --source-file Roms.tar.gz + # Install Atari Roms + pip install autorom + wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64 + base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz + AutoROM --accept-license --source-file Roms.tar.gz - pip install .[extra_no_roms,tests,docs] - # Use headless version - pip install opencv-python-headless - - name: Lint with ruff - run: | - make lint - - name: Build the doc - run: | - make doc - - name: Check codestyle - run: | - make check-codestyle - - name: Type check - run: | - make type - - name: Test with pytest - run: | - make pytest + pip install .[extra_no_roms,tests,docs] + # Use headless version + pip install opencv-python-headless + - name: Lint with ruff + run: | + make lint + - name: Build the doc + run: | + make doc + - name: Check codestyle + run: | + make check-codestyle + - name: Type check + run: | + make type + - name: Test with pytest + run: | + make pytest