From 1c25053481f4b5df803b8c500802aa4ba51525ca Mon Sep 17 00:00:00 2001
From: deathcoder <night-devil@hotmail.it>
Date: Sat, 14 Sep 2024 17:45:23 +0200
Subject: [PATCH 1/3] Fix tests

---
 stable_baselines3/common/buffers.py               | 2 ++
 stable_baselines3/common/envs/bit_flipping_env.py | 4 ++--
 stable_baselines3/common/utils.py                 | 3 +++
 stable_baselines3/common/vec_env/vec_normalize.py | 2 +-
 tests/test_spaces.py                              | 3 +++
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
index 306b43571..70198d7fc 100644
--- a/stable_baselines3/common/buffers.py
+++ b/stable_baselines3/common/buffers.py
@@ -135,6 +135,8 @@ def to_torch(self, array: np.ndarray, copy: bool = True) -> th.Tensor:
         :return:
         """
         if copy:
+            if hasattr(th, "backends") and th.backends.mps.is_built():
+                return th.tensor(array, dtype=th.float32, device=self.device)
             return th.tensor(array, device=self.device)
         return th.as_tensor(array, device=self.device)
 
diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py
index 3ea0c7bb0..e762d95ff 100644
--- a/stable_baselines3/common/envs/bit_flipping_env.py
+++ b/stable_baselines3/common/envs/bit_flipping_env.py
@@ -78,11 +78,11 @@ def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
         if self.discrete_obs_space:
             # The internal state is the binary representation of the
             # observed one
-            return int(sum(state[i] * 2**i for i in range(len(state))))
+            return int(sum(int(state[i]) * 2**i for i in range(len(state))))
 
         if self.image_obs_space:
             size = np.prod(self.image_shape)
-            image = np.concatenate((state * 255, np.zeros(size - len(state), dtype=np.uint8)))
+            image = np.concatenate((state.astype(np.uint8) * 255, np.zeros(size - len(state), dtype=np.uint8)))
             return image.reshape(self.image_shape).astype(np.uint8)
         return state
 
diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py
index ff83e0e4c..9e91dac47 100644
--- a/stable_baselines3/common/utils.py
+++ b/stable_baselines3/common/utils.py
@@ -483,6 +483,8 @@ def obs_as_tensor(obs: Union[np.ndarray, Dict[str, np.ndarray]], device: th.devi
     if isinstance(obs, np.ndarray):
         return th.as_tensor(obs, device=device)
     elif isinstance(obs, dict):
+        if hasattr(th, "backends") and th.backends.mps.is_built():
+            return {key: th.as_tensor(_obs, dtype=th.float32, device=device) for (key, _obs) in obs.items()}
         return {key: th.as_tensor(_obs, device=device) for (key, _obs) in obs.items()}
     else:
         raise Exception(f"Unrecognized type of observation {type(obs)}")
@@ -523,6 +525,7 @@ def get_available_accelerator() -> str:
     """
     if hasattr(th, "backends") and th.backends.mps.is_built():
         # MacOS Metal GPU
+        th.set_default_dtype(th.float32)
         return "mps"
     elif th.cuda.is_available():
         return "cuda"
diff --git a/stable_baselines3/common/vec_env/vec_normalize.py b/stable_baselines3/common/vec_env/vec_normalize.py
index 391ce342d..cda2370aa 100644
--- a/stable_baselines3/common/vec_env/vec_normalize.py
+++ b/stable_baselines3/common/vec_env/vec_normalize.py
@@ -254,7 +254,7 @@ def normalize_reward(self, reward: np.ndarray) -> np.ndarray:
         """
         if self.norm_reward:
             reward = np.clip(reward / np.sqrt(self.ret_rms.var + self.epsilon), -self.clip_reward, self.clip_reward)
-        return reward
+        return reward.astype(np.float32)
 
     def unnormalize_obs(self, obs: Union[np.ndarray, Dict[str, np.ndarray]]) -> Union[np.ndarray, Dict[str, np.ndarray]]:
         # Avoid modifying by reference the original object
diff --git a/tests/test_spaces.py b/tests/test_spaces.py
index e006c1f96..8e5898f2b 100644
--- a/tests/test_spaces.py
+++ b/tests/test_spaces.py
@@ -4,6 +4,7 @@
 import gymnasium as gym
 import numpy as np
 import pytest
+import torch as th
 from gymnasium import spaces
 from gymnasium.spaces.space import Space
 
@@ -151,6 +152,8 @@ def test_discrete_obs_space(model_class, env):
     ],
 )
 def test_float64_action_space(model_class, obs_space, action_space):
+    if hasattr(th, "backends") and th.backends.mps.is_built():
+        pytest.skip("MPS framework doesn't support float64")
     env = DummyEnv(obs_space, action_space)
     env = gym.wrappers.TimeLimit(env, max_episode_steps=200)
     if isinstance(env.observation_space, spaces.Dict):

From f822ef53fcfaed0837f9f218d9b77a4407869ac0 Mon Sep 17 00:00:00 2001
From: deathcoder <night-devil@hotmail.it>
Date: Tue, 17 Sep 2024 17:40:48 +0200
Subject: [PATCH 2/3] Attempt fix ci: only cast reward from float64 to float32

---
 .../common/vec_env/vec_normalize.py             | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/stable_baselines3/common/vec_env/vec_normalize.py b/stable_baselines3/common/vec_env/vec_normalize.py
index cda2370aa..ca0a98012 100644
--- a/stable_baselines3/common/vec_env/vec_normalize.py
+++ b/stable_baselines3/common/vec_env/vec_normalize.py
@@ -125,6 +125,20 @@ def _sanity_checks(self) -> None:
                 f"not {self.observation_space}"
             )
 
+    @staticmethod
+    def _maybe_cast_reward(reward: np.ndarray) -> np.ndarray:
+        """
+        Cast `np.float64` reward datatype to `np.float32`,
+        keep the others dtype unchanged.
+
+        :param dtype: The original action space dtype
+        :return: ``np.float32`` if the dtype was float64,
+            the original dtype otherwise.
+        """
+        if reward.dtype == np.float64:
+            return reward.astype(np.float32)
+        return reward
+
     def __getstate__(self) -> Dict[str, Any]:
         """
         Gets state for pickling.
@@ -254,7 +268,8 @@ def normalize_reward(self, reward: np.ndarray) -> np.ndarray:
         """
         if self.norm_reward:
             reward = np.clip(reward / np.sqrt(self.ret_rms.var + self.epsilon), -self.clip_reward, self.clip_reward)
-        return reward.astype(np.float32)
+
+        return self._maybe_cast_reward(reward)
 
     def unnormalize_obs(self, obs: Union[np.ndarray, Dict[str, np.ndarray]]) -> Union[np.ndarray, Dict[str, np.ndarray]]:
         # Avoid modifying by reference the original object

From 1ac4a60d52dca76d410f111b445f49c9d2c2a231 Mon Sep 17 00:00:00 2001
From: deathcoder <night-devil@hotmail.it>
Date: Tue, 17 Sep 2024 18:01:39 +0200
Subject: [PATCH 3/3] allow running workflows from ui

---
 .github/workflows/ci.yml | 72 ++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b1078cd28..113b9d7bd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,10 +5,10 @@ name: CI
 
 on:
   push:
-    branches: [ master ]
+    branches: [master]
   pull_request:
-    branches: [ master ]
-
+    branches: [master]
+  workflow_dispatch:
 jobs:
   build:
     env:
@@ -23,38 +23,38 @@ jobs:
         python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # cpu version of pytorch
-        pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # cpu version of pytorch
+          pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
 
-        # Install Atari Roms
-        pip install autorom
-        wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
-        base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
-        AutoROM --accept-license --source-file Roms.tar.gz
+          # Install Atari Roms
+          pip install autorom
+          wget https://gist.githubusercontent.com/jjshoots/61b22aefce4456920ba99f2c36906eda/raw/00046ac3403768bfe45857610a3d333b8e35e026/Roms.tar.gz.b64
+          base64 Roms.tar.gz.b64 --decode &> Roms.tar.gz
+          AutoROM --accept-license --source-file Roms.tar.gz
 
-        pip install .[extra_no_roms,tests,docs]
-        # Use headless version
-        pip install opencv-python-headless
-    - name: Lint with ruff
-      run: |
-        make lint
-    - name: Build the doc
-      run: |
-        make doc
-    - name: Check codestyle
-      run: |
-        make check-codestyle
-    - name: Type check
-      run: |
-        make type
-    - name: Test with pytest
-      run: |
-        make pytest
+          pip install .[extra_no_roms,tests,docs]
+          # Use headless version
+          pip install opencv-python-headless
+      - name: Lint with ruff
+        run: |
+          make lint
+      - name: Build the doc
+        run: |
+          make doc
+      - name: Check codestyle
+        run: |
+          make check-codestyle
+      - name: Type check
+        run: |
+          make type
+      - name: Test with pytest
+        run: |
+          make pytest