@@ -187,8 +187,8 @@ def __init__(
187187
188188 self .target_pos = target_pos
189189 if self .target_pos is None :
190- self .target_pos_min = [- 0.05 , - 0.25 , 0.8 ]
191- self .target_pos_max = [0.15 , 0.25 , 1.2 ]
190+ self .target_pos_min = [- 0.05 , - 0.25 , 0.73 ]
191+ self .target_pos_max = [0.15 , 0.25 , 0.95 ]
192192
193193 self .base_types = base_types
194194
@@ -255,6 +255,12 @@ def reward(self, action=None):
255255 # reaching_reward = -((2 * dist) ** 2)
256256 reward += reaching_reward
257257
258+ # penalize for non-zero joint velocities once close to target
259+ if target_to_gripper_dist < 0.03 :
260+ qvels = np .linalg .norm (self .robots [0 ]._joint_velocities )
261+ static_reward = 1 - np .tanh (10.0 * qvels )
262+ reward += static_reward
263+
258264 return reward
259265
260266 def _load_model (self ):
@@ -286,9 +292,9 @@ def _load_model(self):
286292
287293 # Set target location without physical objects
288294 if self .target_pos is None :
289- target_pos = np .random .uniform (low = self .target_pos_min , high = self .target_pos_max )
295+ self . target_pos = np .random .uniform (low = self .target_pos_min , high = self .target_pos_max )
290296 else :
291- target_pos = self .target_pos
297+ self . target_pos = self .target_pos
292298
293299 self .target = BallObject (
294300 name = "target" ,
@@ -297,7 +303,7 @@ def _load_model(self):
297303 obj_type = 'visual' ,
298304 joints = None
299305 )
300- self .target .get_obj ().set ("pos" , " " .join ([str (num ) for num in target_pos ]))
306+ self .target .get_obj ().set ("pos" , " " .join ([str (num ) for num in self . target_pos ]))
301307
302308 # task includes arena, robot, and objects of interest
303309 self .model = ManipulationTask (
@@ -410,6 +416,9 @@ def _reset_internal(self):
410416 """
411417 Resets simulation internal configurations.
412418 """
419+ self .target_pos = np .random .uniform (low = self .target_pos_min , high = self .target_pos_max )
420+ self .target .get_obj ().set ("pos" , " " .join ([str (num ) for num in self .target_pos ]))
421+
413422 super ()._reset_internal ()
414423
415424
@@ -430,15 +439,16 @@ def _check_success(self):
430439 Check if gripper has reached target
431440
432441 Returns:
433- bool: True if target has been reached
442+ bool: True if target has been reached, and robot is static
434443 """
435444
436445 target_to_gripper_dist = self ._gripper_to_target (
437446 gripper = self .robots [0 ].gripper , target = self .target .root_body , target_type = "body" , return_distance = True
438447 )
448+ is_robot_static = np .linalg .norm (self .robots [0 ]._joint_velocities ) < 0.01
439449
440- # gripper is within a small distance to the target
441- return target_to_gripper_dist < 0.03
450+ # gripper is within a small distance to the target and robot is static
451+ return target_to_gripper_dist < 0.03 and is_robot_static
442452
443453 def reset_target (self ):
444454
0 commit comments