feat: added A2C algo train and evaluate functions

riwazudas · riwazudas · commit 04cda3be5b66 · 2024-08-26T01:36:25.000+10:00
diff --git a/A2Ctrain.py b/A2Ctrain.py
@@ -0,0 +1,51 @@
+### requires ray version 2.6.3 to run
+
+import ray
+from ray import tune, air
+from ray.tune.registry import register_env
+from env_creator import qsimpy_env_creator
+from ray.rllib.algorithms.a2c  import A2CConfig  # Import A2CConfig
+import os
+
+if __name__ == "__main__":
+    register_env("QSimPyEnv", qsimpy_env_creator)
+    
+    config = (
+        A2CConfig()
+        .framework(framework='torch')
+        .environment(
+            env="QSimPyEnv", 
+            env_config={
+                "obs_filter": "rescale_-1_1",
+                "reward_filter": None,
+                "dataset": "qdataset/qsimpyds_1000_sub_36.csv",
+            },
+        )
+        .training(gamma=0.9, lr=0.01)
+        .rollouts(num_rollout_workers=4)
+    )
+
+    stopping_criteria = {
+        "training_iteration": 1000, 
+        "timesteps_total": 100000
+    }
+
+    # Get the absolute path of the current directory
+    current_directory = os.getcwd()
+
+    # Append the "result" folder to the current directory path
+    result_directory = os.path.join(current_directory, "results")
+    storage_path = f"file://{result_directory}"
+
+    results = tune.Tuner(
+        "A2C",  # Specify A2C algorithm
+        run_config=air.RunConfig(
+            stop=stopping_criteria,
+            # Save checkpoints every 10 iterations.
+            checkpoint_config=air.CheckpointConfig(checkpoint_frequency=100),
+            storage_path=storage_path, 
+            name="A2C_QCE_1000"
+            
+        ),
+        param_space=config.to_dict(),
+    ).fit()
diff --git a/evaluateA2C.py b/evaluateA2C.py
@@ -0,0 +1,39 @@
+from env_creator import qsimpy_env_creator
+from ray.tune.registry import register_env
+from ray.rllib.algorithms import Algorithm
+
+register_env("QSimPyEnv", qsimpy_env_creator)
+
+env = qsimpy_env_creator ( 
+    env_config = {
+        "obs_filter": "rescale_-1_1",
+        "reward_filter": None,
+        "dataset": "qdataset/qsimpyds_1000_sub_36.csv",
+    }
+)
+
+checkpoint_path = "results\A2C_QCE_1000\A2C_QSimPyEnv_3fc51_00000_0_2024-08-11_19-08-18\checkpoint_000100"
+
+model = Algorithm.from_checkpoint(checkpoint_path)
+
+num_ep = 50
+
+for ep in range(num_ep):
+    obs= env.reset()
+    finished = False
+    ep_reward=0
+
+    while not finished: 
+        formatted_obs = obs if not isinstance(obs,tuple) else obs[0]
+        action = model.compute_single_action(formatted_obs,explore = False)
+        obs, reward, finished, _, info = env.step(action)
+        ep_reward += reward
+
+        if finished:
+            print(f"Episode{ep} finished with reward {ep_reward} and info {info} ")
+            break
+env.close()
+
+
+
+