|
16 | 16 | "max_decel": 1, |
17 | 17 | # whether we use an obs space that contains adjacent lane info or just the lead obs |
18 | 18 | "lead_obs": True, |
| 19 | + # whether the reward should come from local vehicles instead of global rewards |
| 20 | + "local_reward": True |
19 | 21 | } |
20 | 22 |
|
21 | 23 |
|
@@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs): |
137 | 139 | return {} |
138 | 140 |
|
139 | 141 | rewards = {} |
140 | | - for rl_id in self.k.vehicle.get_rl_ids(): |
141 | | - if self.env_params.evaluate: |
142 | | - # reward is speed of vehicle if we are in evaluation mode |
143 | | - reward = self.k.vehicle.get_speed(rl_id) |
144 | | - elif kwargs['fail']: |
145 | | - # reward is 0 if a collision occurred |
146 | | - reward = 0 |
147 | | - else: |
148 | | - # reward high system-level velocities |
149 | | - cost1 = average_velocity(self, fail=kwargs['fail']) |
150 | | - |
151 | | - # penalize small time headways |
152 | | - cost2 = 0 |
153 | | - t_min = 1 # smallest acceptable time headway |
154 | | - |
155 | | - lead_id = self.k.vehicle.get_leader(rl_id) |
156 | | - if lead_id not in ["", None] \ |
157 | | - and self.k.vehicle.get_speed(rl_id) > 0: |
158 | | - t_headway = max( |
159 | | - self.k.vehicle.get_headway(rl_id) / |
160 | | - self.k.vehicle.get_speed(rl_id), 0) |
161 | | - cost2 += min((t_headway - t_min) / t_min, 0) |
162 | | - |
163 | | - # weights for cost1, cost2, and cost3, respectively |
164 | | - eta1, eta2 = 1.00, 0.10 |
165 | | - |
166 | | - reward = max(eta1 * cost1 + eta2 * cost2, 0) |
167 | | - |
168 | | - rewards[rl_id] = reward |
| 142 | + if self.env_params.additional_params["local_reward"]: |
| 143 | + for rl_id in self.k.vehicle.get_rl_ids(): |
| 144 | + rewards[rl_id] = 0 |
| 145 | + speeds = [] |
| 146 | + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) |
| 147 | + speeds.extend([speed for speed in follow_speed if speed >= 0]) |
| 148 | + if self.k.vehicle.get_speed(rl_id) >= 0: |
| 149 | + speeds.append(self.k.vehicle.get_speed(rl_id)) |
| 150 | + if len(speeds) > 0: |
| 151 | + # rescale so the q function can estimate it quickly |
| 152 | + rewards[rl_id] = np.mean(speeds) / 500.0 |
| 153 | + else: |
| 154 | + for rl_id in self.k.vehicle.get_rl_ids(): |
| 155 | + if self.env_params.evaluate: |
| 156 | + # reward is speed of vehicle if we are in evaluation mode |
| 157 | + reward = self.k.vehicle.get_speed(rl_id) |
| 158 | + elif kwargs['fail']: |
| 159 | + # reward is 0 if a collision occurred |
| 160 | + reward = 0 |
| 161 | + else: |
| 162 | + # reward high system-level velocities |
| 163 | + cost1 = average_velocity(self, fail=kwargs['fail']) |
| 164 | + |
| 165 | + # penalize small time headways |
| 166 | + cost2 = 0 |
| 167 | + t_min = 1 # smallest acceptable time headway |
| 168 | + |
| 169 | + lead_id = self.k.vehicle.get_leader(rl_id) |
| 170 | + if lead_id not in ["", None] \ |
| 171 | + and self.k.vehicle.get_speed(rl_id) > 0: |
| 172 | + t_headway = max( |
| 173 | + self.k.vehicle.get_headway(rl_id) / |
| 174 | + self.k.vehicle.get_speed(rl_id), 0) |
| 175 | + cost2 += min((t_headway - t_min) / t_min, 0) |
| 176 | + |
| 177 | + # weights for cost1, cost2, and cost3, respectively |
| 178 | + eta1, eta2 = 1.00, 0.10 |
| 179 | + |
| 180 | + reward = max(eta1 * cost1 + eta2 * cost2, 0) |
| 181 | + |
| 182 | + rewards[rl_id] = reward |
169 | 183 | return rewards |
170 | 184 |
|
171 | 185 | def additional_command(self): |
|
0 commit comments