Skip to content

Commit

Permalink
Documentation and formatting
Browse files Browse the repository at this point in the history
remove print
  • Loading branch information
FelipeMartins96 authored and goncamateus committed Dec 16, 2020
1 parent 13d499d commit 2a47c08
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 90 deletions.
166 changes: 97 additions & 69 deletions envs/rc_gym/vss/env_3v3/vss_gym_3v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,42 +9,49 @@


class VSS3v3Env(VSSBaseEnv):
"""
Description:
This environment controls a single robot soccer in VSS League 3v3 match
Observation:
Type: Box(53)
Num Observation normalized
0 Ball X
1 Ball Y
2 Ball Vx
3 Ball Vy
x id i Blue Robot target_x
x id i Blue Robot target_y
x id i Blue Robot X
x id i Blue Robot Y
x id i Blue Robot sin(theta)
x id i Blue Robot cos(theta)
x id i Blue Robot Vx
x id i Blue Robot Vy
x id i Blue Robot v_theta
x id i Yellow Robot X
x id i Yellow Robot Y
x id i Yellow Robot sin(theta)
x id i Yellow Robot cos(theta)
x id i Yellow Robot Vx
x id i Yellow Robot Vy
x id i Yellow Robot v_theta
Actions:
Type: Box(2, )
Num Action
0 id 0 Blue Angular Speed (%)
1 id 0 Blue Linear Speed (%)
Reward:
Starting State:
TODO
Episode Termination:
Match time or goal
"""This environment controls a single robot in a VSS soccer League 3v3 match
Description:
Observation:
Type: Box(53)
Normalized Bounds to [0, 1]
Num Observation normalized
0 Ball X
1 Ball Y
2 Ball Vx
3 Ball Vy
5 + (9 * i) id i Blue Robot target_x
6 + (9 * i) id i Blue Robot target_y
7 + (9 * i) id i Blue Robot X
8 + (9 * i) id i Blue Robot Y
9 + (9 * i) id i Blue Robot sin(theta)
10 + (9 * i) id i Blue Robot cos(theta)
11 + (9 * i) id i Blue Robot Vx
12 + (9 * i) id i Blue Robot Vy
13 + (9 * i) id i Blue Robot v_theta
32 + (7 * i) id i Yellow Robot X
33 + (7 * i) id i Yellow Robot Y
34 + (7 * i) id i Yellow Robot sin(theta)
35 + (7 * i) id i Yellow Robot cos(theta)
36 + (7 * i) id i Yellow Robot Vx
37 + (7 * i) id i Yellow Robot Vy
38 + (7 * i) id i Yellow Robot v_theta
Actions:
Type: Box(2, )
Num Action
0 id 0 Blue Angular Speed (%)
1 id 0 Blue Linear Speed (%)
Reward:
Sum of Rewards:
Goal
Ball Gradient
Move to Ball
Energy Penalty
Starting State:
Randomized Robots and Ball initial Position
Episode Termination:
5 minutes match time
"""

def __init__(self):
Expand All @@ -54,19 +61,33 @@ def __init__(self):
self.action_space = gym.spaces.Box(
low=-1, high=1, shape=(2, ), dtype=np.float32)
self.observation_space = gym.spaces.Box(
low=-0, high=1, shape=(53, ), dtype=np.float32)
low=0, high=1, shape=(53, ), dtype=np.float32)

# Initialize Class Atributes
self.matches_played = 0
self.previous_ball_potential = None
self.actions: Dict = None
self.reward_shaping_total = None
self.summary_writer = None

print('Environment initialized')

def reset(self):
self.actions = None
self.reward_shaping_total = None

return super().reset()

def _frame_to_observations(self):

observation = []

width = 1.3/2.0
lenght = (1.5/2.0) + 0.1
half_width = self.field_params['field_width'] / 2.0
half_lenght = (self.field_params['field_length'] / 2.0)\
+ self.field_params['goal_depth']
observation.append(1 - (self.frame.time / 300))
observation.append(normX(lenght + self.frame.ball.x))
observation.append(normX(width - self.frame.ball.y))
observation.append(normX(half_lenght + self.frame.ball.x))
observation.append(normX(half_width - self.frame.ball.y))
observation.append(normVx(self.frame.ball.v_x))
observation.append(normVx(-self.frame.ball.v_y))

Expand All @@ -86,10 +107,11 @@ def _frame_to_observations(self):
else:
target_x = self.frame.robots_blue[i].x
target_y = self.frame.robots_blue[i].y
observation.append(normX(lenght + target_x))
observation.append(normX(width - target_y))
observation.append(normX(lenght + self.frame.robots_blue[i].x))
observation.append(normX(width - self.frame.robots_blue[i].y))
observation.append(normX(half_lenght + target_x))
observation.append(normX(half_width - target_y))
observation.append(
normX(half_lenght + self.frame.robots_blue[i].x))
observation.append(normX(half_width - self.frame.robots_blue[i].y))
observation.append(
np.sin(np.deg2rad(-self.frame.robots_blue[i].theta)))
observation.append(
Expand All @@ -99,8 +121,10 @@ def _frame_to_observations(self):
observation.append(normVt(self.frame.robots_blue[i].v_theta))

for i in range(self.n_robots_yellow):
observation.append(normX(lenght + self.frame.robots_yellow[i].x))
observation.append(normX(width - self.frame.robots_yellow[i].y))
observation.append(
normX(half_lenght + self.frame.robots_yellow[i].x))
observation.append(
normX(half_width - self.frame.robots_yellow[i].y))
observation.append(
np.sin(np.deg2rad(-self.frame.robots_yellow[i].theta)))
observation.append(
Expand All @@ -122,15 +146,13 @@ def _get_commands(self, actions):

# Send random commands to the other robots
for i in range(1, 3):
# actions = self.action_space.sample()
actions = [0., 0.]
actions = self.action_space.sample()
self.actions[i] = actions
v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions)
commands.append(Robot(yellow=False, id=i, v_wheel1=v_wheel1,
v_wheel2=v_wheel2))
for i in range(3):
# actions = self.action_space.sample()
actions = [0., 0.]
actions = self.action_space.sample()
v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions)
commands.append(Robot(yellow=True, id=i, v_wheel1=v_wheel1,
v_wheel2=v_wheel2))
Expand Down Expand Up @@ -163,20 +185,22 @@ def _calculate_reward_and_done(self):
goal = True
else:
# Calculate ball potential
width = 1.3/2.0
lenght = (1.5/2.0) + 0.1
dx_d = 0 - (lenght + self.frame.ball.x) * \
half_width = self.field_params['field_width'] / 2.0
half_lenght = (self.field_params['field_length'] / 2.0)\
+ self.field_params['goal_depth']

dx_d = 0 - (half_lenght + self.frame.ball.x) * \
100 # distance to defence
dx_a = 170.0 - (lenght + self.frame.ball.x) * \
dx_a = 170.0 - (half_lenght + self.frame.ball.x) * \
100 # distance to attack
dy = 65.0 - (width - self.frame.ball.y) * 100
dy = 65.0 - (half_width - self.frame.ball.y) * 100
ball_potential = ((-math.sqrt(dx_a ** 2 + 2 * dy ** 2)
+ math.sqrt(dx_d ** 2 + 2 * dy ** 2)) / 170 - 1) / 2

if self.last_frame is not None:
if self.previous_ball_potential is not None:
grad_ball_potential = np.clip(((ball_potential
- self.previous_ball_potential) * 3/ self.time_step),
- self.previous_ball_potential) * 3 / self.time_step),
-1.0, 1.0)
else:
grad_ball_potential = 0
Expand Down Expand Up @@ -205,14 +229,12 @@ def _calculate_reward_and_done(self):
reward = w_move * move_reward + \
w_ball_grad * grad_ball_potential + \
w_energy * energy_penalty

self.reward_shaping_total['move'] += w_move * move_reward
self.reward_shaping_total['ball_grad'] +=\
w_ball_grad * grad_ball_potential
self.reward_shaping_total['energy'] += w_energy * energy_penalty


self.last_frame = self.frame
self.reward_shaping_total['energy'] += w_energy * \
energy_penalty

if goal:
initial_pos_frame: Frame = self._get_initial_positions_frame()
Expand All @@ -223,12 +245,18 @@ def _calculate_reward_and_done(self):
done = self.steps * self.time_step >= 300

if done and self.summary_writer != None:
self.summary_writer.add_scalar("rw/goal_score", self.reward_shaping_total['goal_score'], self.matches_played)
self.summary_writer.add_scalar("rw/move", self.reward_shaping_total['move'], self.matches_played)
self.summary_writer.add_scalar("rw/ball_grad", self.reward_shaping_total['ball_grad'], self.matches_played)
self.summary_writer.add_scalar("rw/energy", self.reward_shaping_total['energy'], self.matches_played)
self.summary_writer.add_scalar("rw/goals_blue", self.reward_shaping_total['goals_blue'], self.matches_played)
self.summary_writer.add_scalar("rw/goals_yellow", self.reward_shaping_total['goals_yellow'], self.matches_played)
self.summary_writer.add_scalar(
"rw/goal_score", self.reward_shaping_total['goal_score'], self.matches_played)
self.summary_writer.add_scalar(
"rw/move", self.reward_shaping_total['move'], self.matches_played)
self.summary_writer.add_scalar(
"rw/ball_grad", self.reward_shaping_total['ball_grad'], self.matches_played)
self.summary_writer.add_scalar(
"rw/energy", self.reward_shaping_total['energy'], self.matches_played)
self.summary_writer.add_scalar(
"rw/goals_blue", self.reward_shaping_total['goals_blue'], self.matches_played)
self.summary_writer.add_scalar(
"rw/goals_yellow", self.reward_shaping_total['goals_yellow'], self.matches_played)

return reward, done

Expand Down Expand Up @@ -285,6 +313,6 @@ def _actions_to_v_wheels(self, actions):

def set_writer(self, writer):
self.summary_writer = writer

def set_matches_played(self, matches):
self.matches_played = matches
25 changes: 12 additions & 13 deletions envs/rc_gym/vss/vss_gym_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


import time
from typing import Dict, List
from typing import Dict, List, Optional

import gym
import numpy as np
Expand Down Expand Up @@ -34,10 +34,6 @@ def __init__(self, field_type: int,
self.view = None
self.steps = 0
self.sent_commands = None
self.actions: Dict = None
self.previous_ball_potential = None
self.reward_shaping_total = None
self.summary_writer = None

def step(self, action):
self.steps += 1
Expand All @@ -49,7 +45,9 @@ def step(self, action):
self.sent_commands = commands

# Get Frame from simulator
self.last_frame = self.frame
self.frame = self.simulator.get_frame()

# Calculate environment observation, reward and done condition
observation = self._frame_to_observations()
reward, done = self._calculate_reward_and_done()
Expand All @@ -60,10 +58,11 @@ def reset(self):
self.steps = 0
self.last_frame = None
self.sent_commands = None
self.actions = None
self.reward_shaping_total = None

# Close render window
del(self.view)
self.view = None

initial_pos_frame: Frame = self._get_initial_positions_frame()
self.simulator.reset(initial_pos_frame)

Expand All @@ -72,7 +71,7 @@ def reset(self):

return self._frame_to_observations()

def render(self, mode='human') -> None:
def render(self, mode: Optional = None) -> None:
'''
Renders the game depending on
ball's and players' positions.
Expand All @@ -91,8 +90,11 @@ def render(self, mode='human') -> None:
self.n_robots_blue, self.n_robots_yellow, self.field_params)

self.view.render_frame(self.frame)
# if mode == 'human':
# time.sleep(0.01)
if mode == 'human':
time.sleep(0.01)

def close(self):
self.simulator.stop()

def _get_commands(self, action):
'''returns a list of commands of type List[Robot] from type action_space action'''
Expand All @@ -109,6 +111,3 @@ def _calculate_reward_and_done(self):
def _get_initial_positions_frame(self) -> Frame:
'''returns frame with robots initial positions'''
raise NotImplementedError

def close(self):
self.simulator.stop()
10 changes: 2 additions & 8 deletions envs/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ def print_with_description(state):

# env.reset()
# Run for 10 episode and print reward at the end
for i in range(1):
init = time.perf_counter()
for i in range(10):
done = False
next_state = env.reset()
# print_with_description(next_state)
Expand All @@ -86,11 +85,6 @@ def print_with_description(state):
epi_rew += reward
env.render()
# print_with_description(next_state)
# print(np.sqrt((env.frame.robots_blue[0].v_x * env.frame.robots_blue[0].v_x) + (env.frame.robots_blue[0].v_y * env.frame.robots_blue[0].v_y)))
# print(env.frame.robots_blue[0].v_theta)

end = time.perf_counter()

print(300 / (end - init))
print(epi_rew)

env.close()

0 comments on commit 2a47c08

Please sign in to comment.