From 7b227c36574e82ad26c5ade398cf4b9c7371eeed Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Sun, 24 May 2026 16:26:21 -0400 Subject: [PATCH 1/7] rebase update --- examples/sensors/tactile_sandbox.py | 100 ++- genesis/engine/sensors/kinematic_tactile.py | 533 +++++++++++---- genesis/engine/sensors/point_cloud_tactile.py | 499 +++++++------- genesis/engine/sensors/probe.py | 369 +++++++++- .../engine/sensors/surface_distance_probe.py | 49 +- genesis/engine/sensors/tactile_shared.py | 296 ++++++++ genesis/options/sensors/options.py | 60 +- genesis/options/sensors/tactile.py | 188 ++++- genesis/typing.py | 8 + tests/test_sensors.py | 647 +++++++++++++++++- 10 files changed, 2224 insertions(+), 525 deletions(-) create mode 100644 genesis/engine/sensors/tactile_shared.py diff --git a/examples/sensors/tactile_sandbox.py b/examples/sensors/tactile_sandbox.py index 3cc52bbb02..20f2aa8c7e 100644 --- a/examples/sensors/tactile_sandbox.py +++ b/examples/sensors/tactile_sandbox.py @@ -1,6 +1,6 @@ """ Interactive demo of tactile sensors on a fixed taxel pad (box or dome) with controllable objects. -Sensor types: ContactDepthProbe, ElastomerTaxel, KinematicTaxel, ProximityTaxel. +Sensor types: ContactDepthProbe, ContactProbe, ElastomerTaxel, KinematicTaxel, ProximityTaxel. Note that the sensor readings here have not been calibrated to any units, and is purely for visualization purposes. """ @@ -24,9 +24,9 @@ from genesis.engine.entities.rigid_entity import RigidEntity from genesis.engine.sensors.base_sensor import Sensor -KEY_DPOS = 0.005 +KEY_DPOS = 0.001 FORCE_SCALE = 100.0 -ROT_FORCE_SCALE = 200.0 +ROT_FORCE_SCALE = 100.0 GRID_SIZE = 20 # 20x20 taxels for square PROBE_RADIUS = 0.004 @@ -48,6 +48,7 @@ def _add_tactile_sensor( probe_local_pos: np.ndarray, probe_normal: tuple[float, float, float] | np.ndarray, track_link_idx: tuple[int, ...], + noise: bool, ) -> "Sensor": common = dict( entity_idx=entity.idx, @@ -55,6 +56,15 @@ def _add_tactile_sensor( draw_debug=True, probe_radius=PROBE_RADIUS, ) + if noise: + # Sensor imperfections shared by every tactile sensor type: viscoelastic hysteresis on the measured + # branch, a noised sensing radius, and a per-taxel measured-branch depth gain. + common.update( + hysteresis_strength=0.5, # viscoelastic overshoot fraction + hysteresis_tau=0.1, # viscoelastic relaxation time constant (seconds) + probe_radius_noise=0.001, # additive sensing-radius noise (meters) + probe_gain=1.5, # per-taxel measured-branch depth gain + ) if sensor_type == "elastomer": return scene.add_sensor( gs.sensors.ElastomerTaxel( @@ -63,7 +73,8 @@ def _add_tactile_sensor( track_link_idx=track_link_idx, n_sample_points=2000, dilate_scale=1.0, - shear_scale=100.0, + shear_scale=2.0, + normal_exponent=1.0, **common, ) ) @@ -76,6 +87,17 @@ def _add_tactile_sensor( **common, ) ) + if sensor_type == "contact": + # Schmitt-trigger thresholds (contact depth in meters): a taxel latches on above contact_threshold and + # only releases once the depth drops back below the lower release_threshold. + return scene.add_sensor( + gs.sensors.ContactProbe( + probe_local_pos=probe_local_pos, + contact_threshold=0.004, + release_threshold=0.002, + **common, + ) + ) if sensor_type == "kinematic": return scene.add_sensor( gs.sensors.KinematicTaxel( @@ -83,25 +105,26 @@ def _add_tactile_sensor( probe_local_normal=probe_normal, normal_stiffness=500.0, normal_damping=1.0, - shear_scalar=5.0, - twist_scalar=5.0, + shear_scalar=4.0, + twist_scalar=4.0, normal_exponent=1.5, **common, ) ) - common["probe_radius"] = PROBE_RADIUS * 2 - common["debug_point_cloud_radius"] = 0.001 + common["probe_radius"] = PROBE_RADIUS * 5 if sensor_type == "proximity": return scene.add_sensor( gs.sensors.ProximityTaxel( probe_local_pos=probe_local_pos, track_link_idx=track_link_idx, n_sample_points=4000, - stiffness=200.0, - shear_coupling=100.0, + stiffness=40.0, + shear_coupling=10.0, probe_local_normal=probe_normal, - probe_radius_noise=0.0001, + debug_point_cloud_radius=0.0005, + debug_probe_color=(0.2, 0.6, 1.0), + debug_contact_color=(1.0, 0.2, 0.2), **common, ) ) @@ -111,7 +134,6 @@ def _add_tactile_sensor( def _plot_tactile_sensor( scene: gs.Scene, sensor_type: str, - labels: tuple[str, ...], sensors: "tuple[Sensor, ...]", n_envs: int = 1, plot_normal: tuple[float, float, float] = (0.0, 0.0, -1.0), @@ -122,24 +144,24 @@ def _plot_tactile_sensor( if sensor_type == "elastomer": for env_idx in range(n_envs): - for label, sensor in zip(labels, sensors): + for sensor in sensors: scene.start_recording( lambda s=sensor, i=env_idx: s.read()[i], gs.recorders.MPLVectorFieldPlot( - title=f"({label} {OBJ_PER_ENV_LABELS[env_idx]}) ElastomerTaxel marker displacements", + title=f"({OBJ_PER_ENV_LABELS[env_idx]}) ElastomerTaxel marker displacements", positions=sensor.probe_local_pos.reshape(-1, 3), normal=plot_normal, - scale_factor=1.0, - max_magnitude=0.01, + scale_factor=0.1, + max_magnitude=0.1, ), ) elif sensor_type == "kinematic": for env_idx in range(n_envs): - for label, sensor in zip(labels, sensors): + for sensor in sensors: scene.start_recording( lambda s=sensor, i=env_idx: s.read().force[i], gs.recorders.MPLVectorFieldPlot( - title=f"({label} {OBJ_PER_ENV_LABELS[env_idx]}) KinematicTaxel force", + title=f"({OBJ_PER_ENV_LABELS[env_idx]}) KinematicTaxel force", positions=sensor.probe_local_pos.reshape(-1, 3), normal=plot_normal, scale_factor=0.01, @@ -148,14 +170,14 @@ def _plot_tactile_sensor( ) elif sensor_type == "proximity": for env_idx in range(n_envs): - for label, sensor in zip(labels, sensors): + for sensor in sensors: scene.start_recording( lambda s=sensor, i=env_idx: s.read().force[i], gs.recorders.MPLVectorFieldPlot( - title=f"({label} {OBJ_PER_ENV_LABELS[env_idx]}) ProximityTaxel force", + title=f"({OBJ_PER_ENV_LABELS[env_idx]}) ProximityTaxel force", positions=sensor.probe_local_pos.reshape(-1, 3), normal=plot_normal, - scale_factor=0.5, + scale_factor=0.1, max_magnitude=1.0, ), ) @@ -165,12 +187,22 @@ def _plot_tactile_sensor( lambda i=env_idx: tuple(sensor.read()[i].max() for sensor in sensors), gs.recorders.MPLLinePlot( title=f"ContactDepthProbe max depth ({OBJ_PER_ENV_LABELS[env_idx]})", - labels=labels, x_label="step", y_label="depth", history_length=200, ), ) + elif sensor_type == "contact": + for env_idx in range(n_envs): + scene.start_recording( + lambda i=env_idx: tuple(sensor.read()[i].sum() for sensor in sensors), + gs.recorders.MPLLinePlot( + title=f"ContactProbe taxels in contact ({OBJ_PER_ENV_LABELS[env_idx]})", + x_label="step", + y_label="# taxels", + history_length=200, + ), + ) def _print_sensor_reading(sensor_type: str, sensor: "Sensor", t: float) -> None: @@ -183,6 +215,10 @@ def _print_sensor_reading(sensor_type: str, sensor: "Sensor", t: float) -> None: max_depth = data.max() if max_depth > gs.EPS: print(f"t={t:.2f}s max depth={max_depth:.4f}") + elif sensor_type == "contact": + n_contact = int(data.sum()) + if n_contact > 0: + print(f"t={t:.2f}s taxels in contact={n_contact}") elif sensor_type == "kinematic": magnitude = torch.linalg.norm(data.force, axis=-1).max() if magnitude > gs.EPS: @@ -204,10 +240,15 @@ def main() -> None: parser.add_argument("--dome", action="store_true", help="Change the sensor object to a dome instead of a box") parser.add_argument( "--sensor", - choices=("elastomer", "depth", "kinematic", "proximity"), + choices=("elastomer", "depth", "contact", "kinematic", "proximity"), default="elastomer", help="Type of tactile sensor to use.", ) + parser.add_argument( + "--noise", + action="store_true", + help="Enable sensor imperfections (viscoelastic hysteresis, probe_radius_noise, probe_gain).", + ) args = parser.parse_args() gs.init( @@ -276,17 +317,17 @@ def main() -> None: ny=GRID_SIZE, ) - # Procedural torus written to a temp .obj (avoids checking a 2k-line mesh into the repo). torus_path = os.path.join(tempfile.gettempdir(), "tactile_sandbox_torus.obj") if not os.path.exists(torus_path): - trimesh.creation.torus(major_radius=0.3, minor_radius=0.1).export(torus_path) + trimesh.creation.torus(major_radius=1.0, minor_radius=0.5).export(torus_path) obj = scene.add_entity( morph=[ gs.morphs.Mesh( file=torus_path, - euler=(90.0, 0.0, 0.0), - scale=OBJECT_SIZE, + euler=(0.0, 0.0, 0.0), + scale=OBJECT_SIZE * 2, + convexify=False, ), gs.morphs.Sphere( radius=OBJECT_SIZE / 2, @@ -314,9 +355,10 @@ def main() -> None: probe_local_pos, probe_normal, track_link_idx=(obj.base_link_idx,), + noise=args.noise, ) if args.vis and "PYTEST_VERSION" not in os.environ: - _plot_tactile_sensor(scene, args.sensor, ("",), (sensor,), n_envs=4, plot_normal=probe_normal_axis) + _plot_tactile_sensor(scene, args.sensor, (sensor,), n_envs=4, plot_normal=probe_normal_axis) scene.build(n_envs=4, env_spacing=(SENSOR_OBJ_SIZE * 1.2, SENSOR_OBJ_SIZE * 1.2)) obj_init_pos = tensor_to_array(obj.get_pos()) @@ -382,7 +424,7 @@ def rotate(axis_idx: int, is_negative: bool): print("\n=== Tactile Sensor Sandbox ===") n_taxels = probe_local_pos.reshape(-1, 3).shape[0] layout = f"dome ({GRID_SIZE} latitude rings)" if args.dome else f"plane grid {probe_local_pos.shape[:-1]}" - print(f"sensor={args.sensor}; taxels={n_taxels}; {layout}") + print(f"sensor={args.sensor}; taxels={n_taxels}; {layout}; noise={'on' if args.noise else 'off'}") if args.vis and IS_MATPLOTLIB_AVAILABLE: print("Matplotlib live plot enabled when supported.") if args.vis: diff --git a/genesis/engine/sensors/kinematic_tactile.py b/genesis/engine/sensors/kinematic_tactile.py index f4e74e9d2b..cc04228871 100644 --- a/genesis/engine/sensors/kinematic_tactile.py +++ b/genesis/engine/sensors/kinematic_tactile.py @@ -1,5 +1,6 @@ +import math from dataclasses import dataclass -from typing import TYPE_CHECKING, Callable, NamedTuple +from typing import TYPE_CHECKING, Generic, NamedTuple, TypeVar import numpy as np import quadrants as qd @@ -23,6 +24,15 @@ ProbesWithNormalSensorMixin, ProbesWithNormalSensorSharedMetadataT, func_noised_probe_radius, + get_measured_bufs, +) +from .tactile_shared import ( + GridFFTConvMetadataMixin, + ViscoelasticHysteresisMetadataMixin, + ViscoelasticHysteresisMixin, + next_pow2, + normalize_grid_probe_layout, + register_grid_fft_sensor, ) if TYPE_CHECKING: @@ -149,12 +159,66 @@ def _func_query_contact_depth( return max_pen_gt, contact_link_gt, contact_normal_gt, max_pen_m, contact_link_m, contact_normal_m +@qd.func +def _func_kinematic_spring_damper( + i_b: int, + max_penetration: float, + contact_link: int, + contact_normal: qd.types.vector(3), + sensor_link_idx: int, + probe_pos: qd.types.vector(3), + probe_pos_local: qd.types.vector(3), + link_quat: qd.types.vector(4), + normal_stiffness: float, + normal_damping: float, + normal_exponent: float, + shear_scalar: float, + twist_scalar: float, + links_state: array_class.LinksState, +): + """ + Kinematic spring-damper force / torque in the sensor link frame from a single probe's contact query. + + Shared by the GT and measured branches of ``_kernel_kinematic_taxel`` (they differ only in which dual-radius + query result is fed in). Returns ``(force_local, torque_local)``; both zero when ``max_penetration <= 0``. + """ + force_local = qd.Vector.zero(gs.qd_float, 3) + torque_local = qd.Vector.zero(gs.qd_float, 3) + if max_penetration > 0: + contact_normal_local = gu.qd_inv_transform_by_quat(contact_normal, link_quat) + s = qd.pow(max_penetration, normal_exponent) + force_local = contact_normal_local * (normal_stiffness * s) + + if contact_link >= 0: + contact_vel = links_state.cd_vel[contact_link, i_b] + links_state.cd_ang[contact_link, i_b].cross( + probe_pos - links_state.root_COM[contact_link, i_b] + ) + sensor_vel = links_state.cd_vel[sensor_link_idx, i_b] + links_state.cd_ang[sensor_link_idx, i_b].cross( + probe_pos - links_state.root_COM[sensor_link_idx, i_b] + ) + rel_vel_world = contact_vel - sensor_vel + rel_vel_local = gu.qd_inv_transform_by_quat(rel_vel_world, link_quat) + + vn_dot = rel_vel_local.dot(contact_normal_local) + v_t_local = rel_vel_local - contact_normal_local * vn_dot + force_local += contact_normal_local * (normal_damping * s * vn_dot) - shear_scalar * v_t_local + + rel_ang_world = links_state.cd_ang[contact_link, i_b] - links_state.cd_ang[sensor_link_idx, i_b] + omega_n = rel_ang_world.dot(contact_normal) + torque_local = probe_pos_local.cross(force_local) - contact_normal_local * (twist_scalar * omega_n) + else: + torque_local = probe_pos_local.cross(force_local) + + return force_local, torque_local + + @qd.kernel def _kernel_kinematic_taxel( probe_positions_local: qd.types.ndarray(), probe_sensor_idx: qd.types.ndarray(), probe_radii: qd.types.ndarray(), probe_radii_noise: qd.types.ndarray(), + probe_gains: qd.types.ndarray(), normal_stiffness: qd.types.ndarray(), normal_damping: qd.types.ndarray(), normal_exponent: qd.types.ndarray(), @@ -172,6 +236,7 @@ def _kernel_kinematic_taxel( rigid_global_info: array_class.RigidGlobalInfo, sdf_info: array_class.SDFInfo, eps: float, + measured_equals_gt: int, output_gt: qd.types.ndarray(), output_measured: qd.types.ndarray(), ): @@ -180,6 +245,20 @@ def _kernel_kinematic_taxel( for i_p, i_b in qd.ndrange(total_n_probes, n_batches): i_s = probe_sensor_idx[i_p] + probe_idx_in_sensor = i_p - sensor_probe_start[i_s] + cache_start = sensor_cache_start[i_s] + n_probes = n_probes_per_sensor[i_s] + force_start = cache_start + probe_idx_in_sensor * 3 + torque_start = cache_start + n_probes * 3 + probe_idx_in_sensor * 3 + + # Inactive filler probe (probe_radius == 0): reads zero force/torque, no contact query. + if probe_radii[i_p] <= gs.qd_float(0.0): + for j in qd.static(range(3)): + output_gt[force_start + j, i_b] = gs.qd_float(0.0) + output_gt[torque_start + j, i_b] = gs.qd_float(0.0) + output_measured[force_start + j, i_b] = gs.qd_float(0.0) + output_measured[torque_start + j, i_b] = gs.qd_float(0.0) + continue probe_pos_local = qd.Vector( [probe_positions_local[i_p, 0], probe_positions_local[i_p, 1], probe_positions_local[i_p, 2]] @@ -220,77 +299,47 @@ def _kernel_kinematic_taxel( eps, ) - force_local_gt = qd.Vector.zero(gs.qd_float, 3) - torque_local_gt = qd.Vector.zero(gs.qd_float, 3) - if max_penetration_gt > 0: - contact_normal_local = gu.qd_inv_transform_by_quat(contact_normal_gt, link_quat) - s = qd.pow(max_penetration_gt, normal_exponent[i_s]) - force_local_gt = contact_normal_local * (normal_stiffness[i_s] * s) - - if contact_link_gt >= 0: - contact_vel = links_state.cd_vel[contact_link_gt, i_b] + links_state.cd_ang[contact_link_gt, i_b].cross( - probe_pos - links_state.root_COM[contact_link_gt, i_b] - ) - sensor_vel = links_state.cd_vel[sensor_link_idx, i_b] + links_state.cd_ang[sensor_link_idx, i_b].cross( - probe_pos - links_state.root_COM[sensor_link_idx, i_b] - ) - rel_vel_world = contact_vel - sensor_vel - rel_vel_local = gu.qd_inv_transform_by_quat(rel_vel_world, link_quat) - - vn_dot = rel_vel_local.dot(contact_normal_local) - v_t_local = rel_vel_local - contact_normal_local * vn_dot - force_local_gt += ( - contact_normal_local * (normal_damping[i_s] * s * vn_dot) - shear_scalar[i_s] * v_t_local - ) - - rel_ang_world = links_state.cd_ang[contact_link_gt, i_b] - links_state.cd_ang[sensor_link_idx, i_b] - omega_n = rel_ang_world.dot(contact_normal_gt) - torque_local_gt = probe_pos_local.cross(force_local_gt) - contact_normal_local * ( - twist_scalar[i_s] * omega_n - ) - else: - torque_local_gt = probe_pos_local.cross(force_local_gt) - - force_local_m = qd.Vector.zero(gs.qd_float, 3) - torque_local_m = qd.Vector.zero(gs.qd_float, 3) - if not use_noised_radius: - for j in qd.static(range(3)): - force_local_m[j] = force_local_gt[j] - torque_local_m[j] = torque_local_gt[j] - elif max_penetration_m > 0: - contact_normal_local = gu.qd_inv_transform_by_quat(contact_normal_m, link_quat) - s = qd.pow(max_penetration_m, normal_exponent[i_s]) - force_local_m = contact_normal_local * (normal_stiffness[i_s] * s) - - if contact_link_m >= 0: - contact_vel = links_state.cd_vel[contact_link_m, i_b] + links_state.cd_ang[contact_link_m, i_b].cross( - probe_pos - links_state.root_COM[contact_link_m, i_b] - ) - sensor_vel = links_state.cd_vel[sensor_link_idx, i_b] + links_state.cd_ang[sensor_link_idx, i_b].cross( - probe_pos - links_state.root_COM[sensor_link_idx, i_b] - ) - rel_vel_world = contact_vel - sensor_vel - rel_vel_local = gu.qd_inv_transform_by_quat(rel_vel_world, link_quat) - - vn_dot = rel_vel_local.dot(contact_normal_local) - v_t_local = rel_vel_local - contact_normal_local * vn_dot - force_local_m += ( - contact_normal_local * (normal_damping[i_s] * s * vn_dot) - shear_scalar[i_s] * v_t_local - ) - - rel_ang_world = links_state.cd_ang[contact_link_m, i_b] - links_state.cd_ang[sensor_link_idx, i_b] - omega_n = rel_ang_world.dot(contact_normal_m) - torque_local_m = probe_pos_local.cross(force_local_m) - contact_normal_local * ( - twist_scalar[i_s] * omega_n - ) - else: - torque_local_m = probe_pos_local.cross(force_local_m) + force_local_gt, torque_local_gt = _func_kinematic_spring_damper( + i_b, + max_penetration_gt, + contact_link_gt, + contact_normal_gt, + sensor_link_idx, + probe_pos, + probe_pos_local, + link_quat, + normal_stiffness[i_s], + normal_damping[i_s], + normal_exponent[i_s], + shear_scalar[i_s], + twist_scalar[i_s], + links_state, + ) + + force_local_m = force_local_gt + torque_local_m = torque_local_gt + if measured_equals_gt == 0: + # The measured branch differs from GT: either some probe has a noised sensing radius or a non-unit + # per-(env, probe) gain. Gain scales the measured penetration only; force / torque then scale as + # ``gain ** normal_exponent`` since they derive from ``s = max_penetration_m ** normal_exponent``. + max_penetration_m = max_penetration_m * probe_gains[i_b, i_p] + force_local_m, torque_local_m = _func_kinematic_spring_damper( + i_b, + max_penetration_m, + contact_link_m, + contact_normal_m, + sensor_link_idx, + probe_pos, + probe_pos_local, + link_quat, + normal_stiffness[i_s], + normal_damping[i_s], + normal_exponent[i_s], + shear_scalar[i_s], + twist_scalar[i_s], + links_state, + ) - probe_idx_in_sensor = i_p - sensor_probe_start[i_s] - cache_start = sensor_cache_start[i_s] - n_probes = n_probes_per_sensor[i_s] - force_start = cache_start + probe_idx_in_sensor * 3 - torque_start = cache_start + n_probes * 3 + probe_idx_in_sensor * 3 for j in qd.static(range(3)): output_gt[force_start + j, i_b] = force_local_gt[j] output_gt[torque_start + j, i_b] = torque_local_gt[j] @@ -304,6 +353,7 @@ def _kernel_contact_depth_probe( probe_sensor_idx: qd.types.ndarray(), probe_radii: qd.types.ndarray(), probe_radii_noise: qd.types.ndarray(), + probe_gains: qd.types.ndarray(), links_idx: qd.types.ndarray(), sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), @@ -321,6 +371,13 @@ def _kernel_contact_depth_probe( for i_p, i_b in qd.ndrange(total_n_probes, n_batches): i_s = probe_sensor_idx[i_p] + # Inactive filler probe (probe_radius == 0): reads zero depth (which contact-probe interprets as no contact). + if probe_radii[i_p] <= gs.qd_float(0.0): + cache_idx = sensor_cache_start[i_s] + i_p - sensor_probe_start[i_s] + output_gt[cache_idx, i_b] = gs.qd_float(0.0) + output_measured[cache_idx, i_b] = gs.qd_float(0.0) + continue + probe_pos_local = qd.Vector( [probe_positions_local[i_p, 0], probe_positions_local[i_p, 1], probe_positions_local[i_p, 2]] ) @@ -348,47 +405,31 @@ def _kernel_contact_depth_probe( collider_state, sdf_info, ) + # Per-(env, probe) gain on the measured-branch depth only. + max_penetration_m = max_penetration_m * probe_gains[i_b, i_p] cache_idx = sensor_cache_start[i_s] + i_p - sensor_probe_start[i_s] output_gt[cache_idx, i_b] = max_penetration_gt output_measured[cache_idx, i_b] = max_penetration_m class KinematicTactileSensorMixin(ProbeSensorMixin[ProbesWithNormalSensorSharedMetadataT]): - def __init__(self, sensor_options: "SensorOptions", sensor_idx: int, sensor_manager: "SensorManager"): - super().__init__(sensor_options, sensor_idx, sensor_manager) - self._debug_objects: list = [] - def build(self): super().build() self._shared_metadata.solver.collider.activate_sdf() - def _draw_debug_probes(self, context: "RasterizerContext", get_is_contact: Callable[[object], object]): - for obj in self._debug_objects: - context.clear_debug_object(obj) - self._debug_objects.clear() - - envs_idx, n_debug_envs, _, probe_world = self._compute_probes_world_pos(context) - data = self.read_ground_truth(envs_idx) - is_contact = np.asarray(tensor_to_array(get_is_contact(data)), dtype=bool).reshape(-1) - probe_global_idx = int(self._shared_metadata.sensor_probe_start[self._idx]) - probe_radius = float(self._shared_metadata.probe_radii[probe_global_idx]) - for is_contact_state in (False, True): - (probes_idx,) = np.nonzero(is_contact == is_contact_state) - if probes_idx.size > 0: - spheres_obj = context.draw_debug_spheres( - poss=probe_world[probes_idx], - radius=probe_radius, - color=self._options.debug_contact_color if is_contact_state else self._options.debug_probe_color, - ) - self._debug_objects.append(spheres_obj) - @dataclass -class ContactDepthProbeMetadata(ProbeSensorMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata): +class ContactDepthProbeMetadata( + ViscoelasticHysteresisMetadataMixin, + ProbeSensorMetadataMixin, + RigidSensorMetadataMixin, + SimpleSensorMetadata, +): pass class ContactDepthProbeSensor( + ViscoelasticHysteresisMixin[ContactDepthProbeMetadata], KinematicTactileSensorMixin[ContactDepthProbeMetadata], RigidSensorMixin[ContactDepthProbeMetadata], SimpleSensor[ContactDepthProbeOptions, ContactDepthProbeMetadata, tuple], @@ -396,7 +437,7 @@ class ContactDepthProbeSensor( """Returns contact depth in meters per probe.""" def _get_return_format(self) -> tuple[int, ...]: - return (self._n_probes,) + return self._probe_layout_shape @classmethod def _get_cache_dtype(cls) -> torch.dtype: @@ -411,19 +452,15 @@ def _update_current_timestep_data( measured_data_timeline: "TensorRingBuffer", ): solver = shared_metadata.solver - - current_ground_truth_data_T.zero_() - measured = measured_data_timeline.at(0, copy=False) - measured.zero_() - if shared_metadata.measured_scratch_T.shape != current_ground_truth_data_T.shape: - shared_metadata.measured_scratch_T = torch.empty_like(current_ground_truth_data_T) - measured_cols_b = shared_metadata.measured_scratch_T - + measured, measured_cols_b = get_measured_bufs( + shared_metadata, current_ground_truth_data_T, measured_data_timeline + ) _kernel_contact_depth_probe( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, shared_metadata.probe_radii, shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, shared_metadata.links_idx, shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, @@ -440,24 +477,48 @@ def _update_current_timestep_data( measured.copy_(measured_cols_b.T) def _draw_debug(self, context: "RasterizerContext"): - self._draw_debug_probes(context, lambda depth: depth >= gs.EPS) + def mask(envs_idx): + depth = self.read_ground_truth(envs_idx) + if self._options.history_length > 0: + depth = depth.select(1 if self._manager._sim.n_envs > 0 else 0, -1) + return depth >= gs.EPS + + self._draw_debug_probes(context, self._tactile_color_groups_fn(mask)) @dataclass class ContactProbeMetadata(ContactDepthProbeMetadata): contact_threshold: torch.Tensor = make_tensor_field((0,)) - # Per-probe threshold scattered into intermediate-cache layout, computed lazily on first `_post_process`. + release_threshold: torch.Tensor = make_tensor_field((0,)) + # Per-probe thresholds scattered into intermediate-cache layout, computed lazily on first `_post_process`. threshold_row: torch.Tensor = make_tensor_field((0,)) + release_threshold_row: torch.Tensor = make_tensor_field((0,)) class ContactProbeSensor(ContactDepthProbeSensor, SimpleSensor[ContactProbeOptions, ContactProbeMetadata, tuple]): - """Returns boolean contact per probe (depth > threshold). Shares the depth-probe kernel.""" + """ + Returns boolean contact per probe with optional Schmitt-trigger hysteresis. Shares the depth-probe kernel. + + The contact bit latches on when depth exceeds ``contact_threshold`` and releases when depth drops to or below + ``release_threshold``. When ``release_threshold`` is left unset (the default; it then falls back to + ``contact_threshold``), the latch is degenerate and behavior matches a stateless threshold. Latch state is read + from the per-branch return-space ring, so GT and measured branches latch independently and reset cleanly with + the env (the manager zeros the ring on reset). + """ def build(self): super().build() self._shared_metadata.contact_threshold = concat_with_tensor( self._shared_metadata.contact_threshold, self._options.contact_threshold, expand=(1,) ) + release = ( + self._options.contact_threshold + if self._options.release_threshold is None + else self._options.release_threshold + ) + self._shared_metadata.release_threshold = concat_with_tensor( + self._shared_metadata.release_threshold, release, expand=(1,) + ) @classmethod def _get_cache_dtype(cls) -> torch.dtype: @@ -483,15 +544,26 @@ def _post_process( i_p = torch.arange(shared_metadata.total_n_probes, device=gs.device, dtype=gs.tc_int) i_s = shared_metadata.probe_sensor_idx cache_idx = shared_metadata.sensor_cache_start[i_s] + i_p - shared_metadata.sensor_probe_start[i_s] - row = torch.zeros((tensor.shape[1],), dtype=tensor.dtype, device=gs.device) - row.scatter_( - 0, cache_idx.to(dtype=torch.int64), shared_metadata.contact_threshold[i_s].to(dtype=tensor.dtype) - ) - shared_metadata.threshold_row = row - return tensor > shared_metadata.threshold_row.unsqueeze(0) + cache_idx_64 = cache_idx.to(dtype=torch.int64) + enter_row = torch.zeros((tensor.shape[1],), dtype=tensor.dtype, device=gs.device) + enter_row.scatter_(0, cache_idx_64, shared_metadata.contact_threshold[i_s].to(dtype=tensor.dtype)) + release_row = torch.zeros((tensor.shape[1],), dtype=tensor.dtype, device=gs.device) + release_row.scatter_(0, cache_idx_64, shared_metadata.release_threshold[i_s].to(dtype=tensor.dtype)) + shared_metadata.threshold_row = enter_row + shared_metadata.release_threshold_row = release_row + above_enter = tensor > shared_metadata.threshold_row.unsqueeze(0) + above_release = tensor > shared_metadata.release_threshold_row.unsqueeze(0) + prev_state = timeline.at(0, copy=False) + return above_enter | (prev_state & above_release) def _draw_debug(self, context: "RasterizerContext"): - self._draw_debug_probes(context, lambda data: data) + def mask(envs_idx): + contact = self.read_ground_truth(envs_idx) + if self._options.history_length > 0: + contact = contact.select(1 if self._manager._sim.n_envs > 0 else 0, -1) + return contact + + self._draw_debug_probes(context, self._tactile_color_groups_fn(mask)) class KinematicTaxelData(NamedTuple): @@ -508,15 +580,188 @@ class KinematicTaxelData(NamedTuple): @dataclass -class KinematicTaxelMetadata(ProbesWithNormalSensorMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata): +class KinematicTaxelMetadata( + ViscoelasticHysteresisMetadataMixin, + GridFFTConvMetadataMixin, + ProbesWithNormalSensorMetadataMixin, + RigidSensorMetadataMixin, + SimpleSensorMetadata, +): normal_stiffness: torch.Tensor = make_tensor_field((0,)) normal_damping: torch.Tensor = make_tensor_field((0,)) normal_exponent: torch.Tensor = make_tensor_field((0,)) shear_scalar: torch.Tensor = make_tensor_field((0,)) twist_scalar: torch.Tensor = make_tensor_field((0,)) + # Spatial crosstalk reuses the shared ``GridFFTConvMetadataMixin`` state. ``grid_fft_meta`` tuples for this + # sensor are ``(sensor_idx, g_ny, g_nx, probe_start, cache_start, sigma, strength, spacing_u, spacing_v)``; + # the kernel is a combined ``(1 - strength) * identity + strength * Gaussian / sum(Gaussian)`` blur and the + # per-step buffer has 6 channels (force xyz + torque xyz). + + +@torch.jit.script +def _precompute_crosstalk_kernel_fft( + sigma: float, + strength: float, + grid_spacing: tuple[float, float], + fft_n: tuple[int, int], + device: torch.device, + dtype: torch.dtype, +) -> torch.Tensor: + """Combined ``(1 - strength) * identity + strength * Gaussian/sum(Gaussian)`` kernel, real-FFT'd. + + Kernel is centered on the FFT origin via ``ifftshift`` so circular convolution is equivalent to convolution with + a kernel anchored at the taxel itself. The Gaussian is L1-normalized so a uniform field passes through unchanged + (DC bin = 1); the identity-blend keeps the response peaked at the source taxel and the rest leaked into the + Gaussian skirt. The output is a complex ``(fft_n[0], fft_n[1] // 2 + 1)`` half-spectrum ready to multiply against + ``rfft2(field)``. + """ + i = torch.arange(fft_n[0], dtype=dtype, device=device) + j = torch.arange(fft_n[1], dtype=dtype, device=device) + yy, xx = torch.meshgrid((i - fft_n[0] // 2) * grid_spacing[0], (j - fft_n[1] // 2) * grid_spacing[1], indexing="ij") + sigma_t = torch.tensor(sigma, dtype=dtype, device=device) + g = torch.exp(-(xx * xx + yy * yy) / (2.0 * sigma_t * sigma_t)) + g = g / g.sum() + # Identity in centered layout: 1 at the central cell. ``ifftshift`` then aligns it with FFT index 0. + identity = torch.zeros_like(g) + identity[fft_n[0] // 2, fft_n[1] // 2] = 1.0 + combined = (1.0 - strength) * identity + strength * g + combined = torch.fft.ifftshift(combined, dim=(-2, -1)) + return torch.fft.rfft2(combined) + + +def _crosstalk_kernel_builder(meta_entry: tuple, fft_n: tuple[int, int]) -> torch.Tensor: + """``register_grid_fft_sensor`` kernel builder for spatial crosstalk: 1 plane (identity-blended Gaussian). + + ``meta_entry`` is ``(sensor_idx, g_ny, g_nx, probe_start, cache_start, sigma, strength, spacing_u, spacing_v)``. + The crosstalk kernel's axis 0 spans ny / tangent_v and axis 1 spans nx / tangent_u, so spacing is passed as + ``(spacing_v, spacing_u)``. + """ + _, _, _, _, _, sigma, strength, spacing_u, spacing_v = meta_entry + k = _precompute_crosstalk_kernel_fft(sigma, strength, (spacing_v, spacing_u), fft_n, gs.device, gs.tc_float) + return k.unsqueeze(0) # (1, fft_ny, fft_nx) -- single kernel plane + + +def _kinematic_taxel_grid_fft_crosstalk( + grid_fft_meta: list[tuple], + grid_fft_kernels_stacked: torch.Tensor, + cache_data: torch.Tensor, + grid_fft_buffer: torch.Tensor, + probe_radii: torch.Tensor, +) -> None: + """ + Apply per-sensor 2D-FFT spatial crosstalk to all 6 channels (force xyz + torque xyz) of every registered + grid-crosstalk KinematicTaxel sensor. Mutates ``cache_data`` in place. + + ``cache_data`` is the per-class intermediate cache in ``(B, total_cols)`` layout. Each KinematicTaxel sensor's + slice spans ``2 * n_probes * 3`` columns: 3 force xyz cols per probe, then 3 torque xyz cols per probe. + """ + if not grid_fft_meta: + return + B = cache_data.shape[0] + fft_ny, fft_nx = grid_fft_buffer.shape[-2], grid_fft_buffer.shape[-1] + + # 1) Fill the active region of the buffer. The zero-padding region is never written here and stays zero from + # allocation (``register_grid_fft_sensor`` allocates with ``torch.zeros``); the active ``[:g_ny, :g_nx]`` region + # is fully overwritten every step, so no per-step ``zero_()`` is needed. + for grid_pos, (_, g_ny, g_nx, _, cache_start, _, _, _, _) in enumerate(grid_fft_meta): + n_probes = g_ny * g_nx + # Layout in cache: force.xyz for all probes, then torque.xyz for all probes. Each block is ``n_probes * 3`` + # cols, with probe-major ordering matching probe flat index ``iy * nx + ix``. + force_block = cache_data[:, cache_start : cache_start + n_probes * 3] + torque_block = cache_data[:, cache_start + n_probes * 3 : cache_start + 2 * n_probes * 3] + # Reshape (B, ny, nx, 3) -> (B, 3, ny, nx); the slice-assignment accepts the non-contiguous permuted view. + grid_fft_buffer[:, grid_pos, 0:3, :g_ny, :g_nx] = force_block.view(B, g_ny, g_nx, 3).permute(0, 3, 1, 2) + grid_fft_buffer[:, grid_pos, 3:6, :g_ny, :g_nx] = torque_block.view(B, g_ny, g_nx, 3).permute(0, 3, 1, 2) + + # 2) Batched real FFT over the last two dims; kernel is per-sensor, broadcast over B and 6 channels. Inputs are + # real so ``rfft2`` (half spectrum) is ~2x cheaper than the full complex ``fft2``. + H_fft = torch.fft.rfft2(grid_fft_buffer) # (B, n_grid_xt, 6, fft_ny, fft_nx // 2 + 1) complex + # Stacked kernels: (n_grid_xt, 1, fft_ny, fft_nx // 2 + 1) -> (1, n_grid_xt, 1, ...) for broadcast. + K = grid_fft_kernels_stacked.unsqueeze(0) + smeared = torch.fft.irfft2(H_fft * K, s=(fft_ny, fft_nx)) # (B, n_grid_xt, 6, fft_ny, fft_nx) + + # 3) Slice each sensor back to its (g_ny, g_nx) grid and write into the cache. + for grid_pos, (_, g_ny, g_nx, probe_start, cache_start, _, _, _, _) in enumerate(grid_fft_meta): + n_probes = g_ny * g_nx + # Zero inactive filler probes (probe_radius == 0): the blur smears neighbour force/torque into their cells. + active = (probe_radii[probe_start : probe_start + n_probes] > 0.0).to(smeared.dtype).view(1, 1, g_ny, g_nx) + force_smeared = smeared[:, grid_pos, 0:3, :g_ny, :g_nx] * active # (B, 3, ny, nx) + torque_smeared = smeared[:, grid_pos, 3:6, :g_ny, :g_nx] * active + # Inverse of the permute used in step 1: (B, 3, ny, nx) -> (B, ny, nx, 3) -> flat (B, ny*nx*3). + cache_data[:, cache_start : cache_start + n_probes * 3] = force_smeared.permute(0, 2, 3, 1).reshape( + B, n_probes * 3 + ) + cache_data[:, cache_start + n_probes * 3 : cache_start + 2 * n_probes * 3] = torque_smeared.permute( + 0, 2, 3, 1 + ).reshape(B, n_probes * 3) + + +CrosstalkSharedMetadataT = TypeVar("CrosstalkSharedMetadataT", bound=KinematicTaxelMetadata) + + +class KinematicTaxelCrosstalkMixin(Generic[CrosstalkSharedMetadataT]): + """ + Adds FFT-based spatial crosstalk (Gaussian blur, optionally mixed with identity) to KinematicTaxel on the + measured branch. Operates on all 6 channels (force xyz + torque xyz) of every grid-shaped sensor with + ``crosstalk_strength > 0``. Must come BEFORE ``SimpleSensor`` and AFTER ``ViscoelasticHysteresisMixin`` in MRO + so the data flow is: kernel output -> crosstalk -> hysteresis -> hardware imperfections. + """ + + _shared_metadata: CrosstalkSharedMetadataT + + def _register_crosstalk(self): + """Register this sensor for FFT crosstalk via the shared ``register_grid_fft_sensor`` scaffolding. + + Called only when this sensor has a validated grid layout AND ``crosstalk_strength > 0``. The FFT size is + ``(ny, nx)``, padded for the Gaussian tail so circular wrap stays below tolerance. + """ + sm = self._shared_metadata + sensor_idx = sm.n_probes_per_sensor.shape[0] - 1 # this sensor was just registered + probe_start = int(sm.sensor_probe_start[sensor_idx].item()) + cache_start = int(sm.sensor_cache_start[sensor_idx].item()) + g_ny, g_nx = int(self._probe_layout_shape[0]), int(self._probe_layout_shape[1]) + sigma = float(self._options.crosstalk_sigma) + strength = float(self._options.crosstalk_strength) + spacing_u = float(self._grid_spacing[0].item()) + spacing_v = float(self._grid_spacing[1].item()) + # FFT size per axis: grid extent + the 3-sigma Gaussian tail on each side, rounded up to a power of 2. + # Truncating at 3 sigma leaves circular wraparound below ~0.3% (sub-tolerance for a well-localized blur). + fft_ny = next_pow2(g_ny + 2 * int(math.ceil(3.0 * sigma / spacing_v))) + fft_nx = next_pow2(g_nx + 2 * int(math.ceil(3.0 * sigma / spacing_u))) + register_grid_fft_sensor( + sm, + meta_entry=(sensor_idx, g_ny, g_nx, probe_start, cache_start, sigma, strength, spacing_u, spacing_v), + this_fft_n=(fft_ny, fft_nx), + kernel_builder=_crosstalk_kernel_builder, + n_buffer_channels=6, + batch_size=self._manager._sim._B, + ) + + @classmethod + def _apply_transform( + cls, + shared_metadata: CrosstalkSharedMetadataT, + data: torch.Tensor, + timeline: "TensorRingBuffer", + *, + is_measured: bool, + ): + super()._apply_transform(shared_metadata, data, timeline, is_measured=is_measured) + if not is_measured or not shared_metadata.any_grid_fft: + return + _kinematic_taxel_grid_fft_crosstalk( + shared_metadata.grid_fft_meta, + shared_metadata.grid_fft_kernels_stacked, + data, + shared_metadata.grid_fft_buffer, + shared_metadata.probe_radii, + ) + class KinematicTaxelSensor( + ViscoelasticHysteresisMixin[KinematicTaxelMetadata], + KinematicTaxelCrosstalkMixin[KinematicTaxelMetadata], KinematicTactileSensorMixin[KinematicTaxelMetadata], ProbesWithNormalSensorMixin[KinematicTaxelMetadata], RigidSensorMixin[KinematicTaxelMetadata], @@ -524,8 +769,32 @@ class KinematicTaxelSensor( ): """Kinematic taxels: spring-damper force and torque per probe from contact geometry and relative motion.""" + # Two channel groups: force xyz followed by torque xyz (probe-major within each group). See + # ``ProbeSensorMixin._taxel_channel_groups`` for how this drives dead-taxel cache-col -> probe mapping. + _taxel_channel_groups: int = 2 + def __init__(self, sensor_options: KinematicTaxelOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) + # FFT-grid eligibility: validates that a 2D layout has uniform spacing/normals/orthogonal tangents. + # Flat pos/normals are already populated by ProbeSensorMixin / ProbesWithNormalSensorMixin. + is_grid = len(self._probe_layout_shape) == 2 + _, _, self._use_grid_fft, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( + normalize_grid_probe_layout( + np.asarray(sensor_options.probe_local_pos, dtype=gs.np_float), + np.asarray(sensor_options.probe_local_normal, dtype=gs.np_float), + is_grid, + ) + ) + self._grid_normal = torch.tensor(grid_normal, dtype=gs.tc_float, device=gs.device) + self._grid_tangent_u = torch.tensor(grid_tangent_u, dtype=gs.tc_float, device=gs.device) + self._grid_tangent_v = torch.tensor(grid_tangent_v, dtype=gs.tc_float, device=gs.device) + self._grid_spacing = torch.tensor(grid_spacing, dtype=gs.tc_float, device=gs.device) + + if self._options.crosstalk_strength > 0.0 and not self._use_grid_fft: + gs.raise_exception( + "KinematicTaxel crosstalk requires a validated grid layout (probe_local_pos shape (ny, nx, 3) with " + "uniform spacing, uniform normals, and orthogonal tangents)." + ) def build(self): super().build() @@ -546,8 +815,12 @@ def build(self): self._shared_metadata.twist_scalar, float(self._options.twist_scalar), expand=(1,) ) + if self._options.crosstalk_strength > 0.0: + self._register_crosstalk() + def _get_return_format(self) -> tuple[tuple[int, ...], ...]: - return (self._n_probes, 3), (self._n_probes, 3) + shape = (*self._probe_layout_shape, 3) + return shape, shape @classmethod def _get_cache_dtype(cls) -> torch.dtype: @@ -562,19 +835,20 @@ def _update_current_timestep_data( measured_data_timeline: "TensorRingBuffer", ): solver = shared_metadata.solver - - current_ground_truth_data_T.zero_() - measured = measured_data_timeline.at(0, copy=False) - measured.zero_() - if shared_metadata.measured_scratch_T.shape != current_ground_truth_data_T.shape: - shared_metadata.measured_scratch_T = torch.empty_like(current_ground_truth_data_T) - measured_cols_b = shared_metadata.measured_scratch_T - + measured, measured_cols_b = get_measured_bufs( + shared_metadata, current_ground_truth_data_T, measured_data_timeline + ) + # The measured branch is provably identical to GT (and the kernel can skip recomputing it) when no probe + # has a noised sensing radius and no probe has a non-unit measured-branch gain. + measured_equals_gt = int( + not shared_metadata.has_any_probe_radius_noise and not shared_metadata.has_any_probe_gain + ) _kernel_kinematic_taxel( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, shared_metadata.probe_radii, shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, shared_metadata.normal_stiffness, shared_metadata.normal_damping, shared_metadata.normal_exponent, @@ -592,6 +866,7 @@ def _update_current_timestep_data( solver._rigid_global_info, solver.collider._sdf._sdf_info, gs.EPS, + measured_equals_gt, current_ground_truth_data_T, measured_cols_b, ) @@ -600,4 +875,10 @@ def _update_current_timestep_data( measured.copy_(measured_cols_b.T) def _draw_debug(self, context: "RasterizerContext"): - self._draw_debug_probes(context, lambda data: torch.linalg.norm(data.force, dim=-1) >= gs.EPS) + def mask(envs_idx): + force = self.read_ground_truth(envs_idx).force + if self._options.history_length > 0: + force = force.select(1 if self._manager._sim.n_envs > 0 else 0, -1) + return torch.linalg.norm(force, dim=-1) >= gs.EPS + + self._draw_debug_probes(context, self._tactile_color_groups_fn(mask)) diff --git a/genesis/engine/sensors/point_cloud_tactile.py b/genesis/engine/sensors/point_cloud_tactile.py index 2037c09f65..d7f66a51f5 100644 --- a/genesis/engine/sensors/point_cloud_tactile.py +++ b/genesis/engine/sensors/point_cloud_tactile.py @@ -21,6 +21,15 @@ ProbesWithNormalSensorMetadataMixin, ProbesWithNormalSensorMixin, func_noised_probe_radius, + get_measured_bufs, +) +from .tactile_shared import ( + GridFFTConvMetadataMixin, + ViscoelasticHysteresisMetadataMixin, + ViscoelasticHysteresisMixin, + next_pow2, + normalize_grid_probe_layout, + register_grid_fft_sensor, ) if TYPE_CHECKING: @@ -411,6 +420,7 @@ def _kernel_point_cloud_proximity_taxel_bvh( pc_active_envs_mask: qd.types.ndarray(), probe_radii: qd.types.ndarray(), probe_radii_noise: qd.types.ndarray(), + probe_gains: qd.types.ndarray(), stiffness: qd.types.ndarray(), shear_coupling: qd.types.ndarray(), proximity_density_scale: qd.types.ndarray(), @@ -550,6 +560,14 @@ def _kernel_point_cloud_proximity_taxel_bvh( fv_m[j] = fv_gt[j] tau_w_m[j] = tau_w_gt[j] + # Per-(env, probe) gain on the measured-branch accumulated penetration. Force and torque computed from + # these accumulators downstream scale linearly with gain because they're proportional to ``sum_p``. + gain_m = probe_gains[i_b, i_p] + sum_p_m = sum_p_m * gain_m + for j in qd.static(range(3)): + fv_m[j] = fv_m[j] * gain_m + tau_w_m[j] = tau_w_m[j] * gain_m + taxel_signal_buf[i_p, i_b] = sum_p_m f_w_gt = qd.Vector.zero(gs.qd_float, 3) @@ -613,7 +631,6 @@ class PointCloudTactileSharedMetadata(ProbeSensorMetadataMixin, RigidSensorMetad class PointCloudTactileSensorMixin(ProbeSensorMixin[PointCloudTactileSensorMetadataMixinT]): def __init__(self, sensor_options: "SensorOptions", sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - self._debug_objects: list = [] self._probe_start_idx = -1 self._debug_pc_chunks: list[tuple[int, torch.Tensor, torch.Tensor]] | None = None @@ -664,38 +681,24 @@ def build(self): ) def _draw_debug_probes( - self, context: "RasterizerContext", get_magnitude_1d: Callable[[list[int] | None], np.ndarray] - ) -> None: - for obj in self._debug_objects: - context.clear_debug_object(obj) - self._debug_objects.clear() - - envs_idx, n_debug_envs, env_offsets, probe_world = self._compute_probes_world_pos(context) - - magnitude = get_magnitude_1d(envs_idx).reshape(-1) - for is_contact in (False, True): - (probes_idx,) = np.nonzero(magnitude >= gs.EPS if is_contact else magnitude < gs.EPS) - if probes_idx.size == 0: - continue - spheres_obj = context.draw_debug_spheres( - poss=probe_world[probes_idx], - radius=self._shared_metadata.probe_radii[self._probe_start_idx].item(), - color=self._options.debug_contact_color if is_contact else self._options.debug_probe_color, - ) - self._debug_objects.append(spheres_obj) + self, + context: "RasterizerContext", + color_groups_fn: Callable[[list[int] | None], list[tuple]] | None = None, + ) -> tuple[list[int] | None, int, np.ndarray | None]: + envs_idx, n_debug_envs, env_offsets = super()._draw_debug_probes(context, color_groups_fn) if self._debug_pc_chunks is None: - return + return envs_idx, n_debug_envs, env_offsets world_chunks: list[np.ndarray] = [] for link_idx, pos_local, active_envs_mask in self._debug_pc_chunks: - trk_link = self._shared_metadata.solver.links[link_idx] + track_link = self._shared_metadata.solver.links[link_idx] if envs_idx is not None: active_mask = tensor_to_array(active_envs_mask[:, envs_idx].T).astype(bool) if not active_mask.any(): continue - trk_pos = trk_link.get_pos(envs_idx)[:, None, :] - trk_quat = trk_link.get_quat(envs_idx)[:, None, :] - pc_world = gu.transform_by_trans_quat(pos_local[None, :, :], trk_pos, trk_quat) + track_pos = track_link.get_pos(envs_idx)[:, None, :] + track_quat = track_link.get_quat(envs_idx)[:, None, :] + pc_world = gu.transform_by_trans_quat(pos_local[None, :, :], track_pos, track_quat) pc_world = tensor_to_array(pc_world) + env_offsets[:, None, :] world_chunks.append(pc_world[active_mask]) else: @@ -703,18 +706,18 @@ def _draw_debug_probes( pos_active = pos_local[active_mask] if pos_active.numel() == 0: continue - trk_pos = trk_link.get_pos(envs_idx).reshape(3) - trk_quat = trk_link.get_quat(envs_idx).reshape(4) - world_chunks.append(tensor_to_array(gu.transform_by_trans_quat(pos_active, trk_pos, trk_quat))) - if not world_chunks: - return - pc_world = np.concatenate(world_chunks, axis=0) - pc_obj = context.draw_debug_spheres( - poss=pc_world, - radius=float(self._options.debug_point_cloud_radius), - color=self._options.debug_point_cloud_color, - ) - self._debug_objects.append(pc_obj) + track_pos = track_link.get_pos(envs_idx).reshape(3) + track_quat = track_link.get_quat(envs_idx).reshape(4) + world_chunks.append(tensor_to_array(gu.transform_by_trans_quat(pos_active, track_pos, track_quat))) + if world_chunks: + self._debug_objects.append( + context.draw_debug_spheres( + poss=np.concatenate(world_chunks, axis=0), + radius=float(self._options.debug_point_cloud_radius), + color=self._options.debug_point_cloud_color, + ) + ) + return envs_idx, n_debug_envs, env_offsets def _debug_probe_buffer_magnitudes(self, buffer: torch.Tensor, envs_idx: list[int] | None) -> np.ndarray: values = buffer[self._probe_start_idx : self._probe_start_idx + self._n_probes] @@ -731,7 +734,11 @@ class ProximityTaxelData(NamedTuple): @dataclass -class ProximityTaxelMetadata(PointCloudTactileSharedMetadata, ProbesWithNormalSensorMetadataMixin): +class ProximityTaxelMetadata( + ViscoelasticHysteresisMetadataMixin, + PointCloudTactileSharedMetadata, + ProbesWithNormalSensorMetadataMixin, +): stiffness: torch.Tensor = make_tensor_field((0,)) shear_coupling: torch.Tensor = make_tensor_field((0,)) proximity_density_scale: torch.Tensor = make_tensor_field((0, 0)) @@ -739,6 +746,7 @@ class ProximityTaxelMetadata(PointCloudTactileSharedMetadata, ProbesWithNormalSe class ProximityTaxelSensor( + ViscoelasticHysteresisMixin[ProximityTaxelMetadata], PointCloudTactileSensorMixin[ProximityTaxelMetadata], ProbesWithNormalSensorMixin[ProximityTaxelMetadata], RigidSensorMixin[ProximityTaxelMetadata], @@ -746,6 +754,9 @@ class ProximityTaxelSensor( ): """Spherical point-cloud taxels: per-taxel force and torque in link-local frame vs tracked meshes.""" + # Two channel groups: force xyz followed by torque xyz (probe-major within each group). + _taxel_channel_groups: int = 2 + def build(self): super().build() self._shared_metadata.stiffness = concat_with_tensor( @@ -769,7 +780,8 @@ def build(self): ) def _get_return_format(self) -> tuple[tuple[int, ...], ...]: - return ((self._n_probes, 3), (self._n_probes, 3)) + shape = (*self._probe_layout_shape, 3) + return shape, shape @classmethod def _get_cache_dtype(cls) -> torch.dtype: @@ -789,13 +801,9 @@ def _update_current_timestep_data( measured_data_timeline: "TensorRingBuffer", ): solver = shared_metadata.solver - current_ground_truth_data_T.zero_() - measured = measured_data_timeline.at(0, copy=False) - measured.zero_() - if shared_metadata.measured_scratch_T.shape != current_ground_truth_data_T.shape: - shared_metadata.measured_scratch_T = torch.empty_like(current_ground_truth_data_T) - measured_cols_b = shared_metadata.measured_scratch_T - + measured, measured_cols_b = get_measured_bufs( + shared_metadata, current_ground_truth_data_T, measured_data_timeline + ) bvh = shared_metadata.pc_bvh _kernel_point_cloud_proximity_taxel_bvh( shared_metadata.probe_positions, @@ -820,6 +828,7 @@ def _update_current_timestep_data( shared_metadata.pc_active_envs_mask, shared_metadata.probe_radii, shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, shared_metadata.stiffness, shared_metadata.shear_coupling, shared_metadata.proximity_density_scale, @@ -836,99 +845,13 @@ def _update_current_timestep_data( def _draw_debug(self, context: "RasterizerContext"): self._draw_debug_probes( context, - lambda envs_idx: self._debug_probe_buffer_magnitudes(self._shared_metadata.taxel_signal_buf, envs_idx), + self._tactile_color_groups_fn( + lambda envs_idx: self._debug_probe_buffer_magnitudes(self._shared_metadata.taxel_signal_buf, envs_idx) + >= gs.EPS, + ), ) -_GRID_TOL = 1.0e-5 - - -def _next_pow2(n: int) -> int: - """Smallest power of 2 >= n (1 if n==0).""" - if n <= 1: - return 1 - p = 1 - while p < n: - p *= 2 - return p - - -def _expand_probe_normals(normals: np.ndarray, n_probes: int, probe_shape: tuple[int, ...]) -> np.ndarray: - normals = np.asarray(normals, dtype=gs.np_float) - if normals.ndim == 1: - return np.broadcast_to(normals, (n_probes, 3)).copy() - if normals.shape == (*probe_shape, 3): - return normals.reshape(n_probes, 3).copy() - if normals.shape == (n_probes, 3): - return normals.copy() - gs.raise_exception( - "ElastomerTaxel probe_local_normal must be one normal or match probe_local_pos shape. " - f"Got normal shape {normals.shape} for probe shape {probe_shape}." - ) - - -def _normalize_elastomer_probe_layout( - probe_pos: np.ndarray, probe_normals: np.ndarray, is_grid: bool -) -> tuple[np.ndarray, np.ndarray, bool, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - probe_shape = probe_pos.shape[:-1] - flat = probe_pos.reshape(-1, 3) - normals = _expand_probe_normals(probe_normals, flat.shape[0], probe_shape) - - normal_norms = np.linalg.norm(normals, axis=1) - if np.any(normal_norms < gs.EPS): - gs.raise_exception("ElastomerTaxel probe_local_normal entries must be non-zero.") - normals = normals / normal_norms[:, None] - - use_grid_fft = False - grid_normal = np.zeros(3, dtype=gs.np_float) - tangent_u = np.zeros(3, dtype=gs.np_float) - tangent_v = np.zeros(3, dtype=gs.np_float) - grid_spacing = np.zeros(2, dtype=gs.np_float) - - if is_grid: - if len(probe_shape) != 2: - gs.raise_exception("ElastomerTaxel grid probe_local_pos must have shape (ny, nx, 3).") - ny, nx = int(probe_shape[0]), int(probe_shape[1]) - if nx >= 2 and ny >= 2: - grid = probe_pos.reshape(ny, nx, 3) - step_u = grid[0, 1] - grid[0, 0] - step_v = grid[1, 0] - grid[0, 0] - spacing_u = float(np.linalg.norm(step_u)) - spacing_v = float(np.linalg.norm(step_v)) - if spacing_u >= gs.EPS and spacing_v >= gs.EPS: - tangent_u_candidate = (step_u / spacing_u).astype(gs.np_float) - tangent_v_candidate = (step_v / spacing_v).astype(gs.np_float) - normal_candidate = normals[0].astype(gs.np_float, copy=False) - normals_are_uniform = bool(np.all(normals @ normal_candidate >= 1.0 - _GRID_TOL)) - axes_are_orthogonal = abs(float(tangent_u_candidate @ tangent_v_candidate)) <= _GRID_TOL - axes_in_plane = ( - abs(float(tangent_u_candidate @ normal_candidate)) <= _GRID_TOL - and abs(float(tangent_v_candidate @ normal_candidate)) <= _GRID_TOL - ) - expected = ( - grid[0, 0] - + np.arange(nx, dtype=gs.np_float)[None, :, None] * step_u[None, None, :] - + np.arange(ny, dtype=gs.np_float)[:, None, None] * step_v[None, None, :] - ) - is_regular = bool(np.max(np.linalg.norm(grid - expected, axis=-1)) <= _GRID_TOL) - use_grid_fft = normals_are_uniform and axes_are_orthogonal and axes_in_plane and is_regular - if use_grid_fft: - grid_normal = normal_candidate - tangent_u = tangent_u_candidate - tangent_v = tangent_v_candidate - grid_spacing = np.array((spacing_u, spacing_v), dtype=gs.np_float) - - return ( - flat.astype(gs.np_float, copy=False), - normals.astype(gs.np_float, copy=False), - use_grid_fft, - grid_normal.astype(gs.np_float, copy=False), - tangent_u.astype(gs.np_float, copy=False), - tangent_v.astype(gs.np_float, copy=False), - grid_spacing.astype(gs.np_float, copy=False), - ) - - @qd.func def _func_elastomer_min_sdf_over_active_geoms( i_b: int, @@ -1000,17 +923,18 @@ def _func_elastomer_update_surface_anchor( @qd.func def _func_elastomer_direct_dilate_contribution( source_pos: qd.types.vector(3), - source_normal: qd.types.vector(3), target_pos: qd.types.vector(3), target_normal: qd.types.vector(3), depth: float, lam: float, scale: float, + normal_exponent: float, ) -> qd.types.vector(3): - source_contact_pos = source_pos - source_normal * depth - diff = target_pos - source_contact_pos - planar_diff = _func_elastomer_tangent(diff, target_normal) - return diff * depth * qd.exp(-lam * planar_diff.dot(planar_diff)) * scale + # Tangential marker spreading is linear in penetration depth; the out-of-plane bulge follows a + # ``depth ** normal_exponent`` power law (mirrors the FFT path's H / H**normal_exponent channel split). + planar_diff = _func_elastomer_tangent(target_pos - source_pos, target_normal) + falloff = qd.exp(-lam * planar_diff.dot(planar_diff)) * scale + return (planar_diff * depth + target_normal * qd.pow(depth, normal_exponent)) * falloff @qd.func @@ -1054,19 +978,37 @@ def _collect_collision_geom_idx(solver, track_link_idx: np.ndarray) -> tuple[tor def _precompute_hydroshear_dilate_kernel_fft( lambda_d: float, grid_spacing: tuple[float, float], fft_n: tuple[int, int], device: torch.device, dtype: torch.dtype ) -> torch.Tensor: - i = torch.arange(fft_n[0], dtype=dtype, device=device) - j = torch.arange(fft_n[1], dtype=dtype, device=device) - xx, yy = torch.meshgrid((i - fft_n[0] // 2) * grid_spacing[0], (j - fft_n[1] // 2) * grid_spacing[1], indexing="ij") - g = torch.exp(torch.tensor(-lambda_d, dtype=dtype, device=device) * (xx * xx + yy * yy)) - k = torch.stack((xx * g, yy * g, g), dim=0) + """Real FFT of the 3-plane HydroShear dilation kernel ``(Ku, Kv, Kn)``. + + ``fft_n`` is ``(fft_ny, fft_nx)`` row-major: axis 0 spans the tangent_v direction, axis 1 the tangent_u + direction. ``grid_spacing`` is ``(spacing_u, spacing_v)``. The output is a complex + ``(3, fft_ny, fft_nx // 2 + 1)`` half-spectrum ready to multiply against ``rfft2(field)``. + """ + iv = torch.arange(fft_n[0], dtype=dtype, device=device) + iu = torch.arange(fft_n[1], dtype=dtype, device=device) + vv, uu = torch.meshgrid( + (iv - fft_n[0] // 2) * grid_spacing[1], (iu - fft_n[1] // 2) * grid_spacing[0], indexing="ij" + ) + g = torch.exp(torch.tensor(-lambda_d, dtype=dtype, device=device) * (uu * uu + vv * vv)) + k = torch.stack((uu * g, vv * g, g), dim=0) k = torch.fft.ifftshift(k, dim=(-2, -1)) - return torch.fft.fft2(k) + return torch.fft.rfft2(k) + + +def _dilate_kernel_builder(meta_entry: tuple, fft_n: tuple[int, int]) -> torch.Tensor: + """``register_grid_fft_sensor`` kernel builder for HydroShear dilation: 3 planes ``(Ku, Kv, Kn)``. + + ``meta_entry`` is ``(sensor_idx, g_ny, g_nx, probe_start, cache_start, lambda_d, spacing_u, spacing_v)``. + """ + _, _, _, _, _, lambda_d, spacing_u, spacing_v = meta_entry + return _precompute_hydroshear_dilate_kernel_fft(lambda_d, (spacing_u, spacing_v), fft_n, gs.device, gs.tc_float) @qd.kernel(fastcache=True) def _kernel_elastomer_probe_depth( probe_positions_local: qd.types.ndarray(), probe_sensor_idx: qd.types.ndarray(), + probe_radii: qd.types.ndarray(), links_idx: qd.types.ndarray(), sensor_track_geom_start: qd.types.ndarray(), sensor_track_geom_n: qd.types.ndarray(), @@ -1085,6 +1027,10 @@ def _kernel_elastomer_probe_depth( n_batches = probe_depth_buf.shape[0] for i_b, i_p in qd.ndrange(n_batches, total_n_probes): + # Inactive filler probe (probe_radius == 0): no SDF query, contributes no dilation. + if probe_radii[i_p] <= gs.qd_float(0.0): + probe_depth_buf[i_b, i_p] = gs.qd_float(0.0) + continue i_s = probe_sensor_idx[i_p] sensor_link_idx = links_idx[i_s] link_pos = links_state.pos[sensor_link_idx, i_b] @@ -1113,11 +1059,13 @@ def _kernel_elastomer_dilate_accumulate( probe_positions_local: qd.types.ndarray(), probe_local_normal: qd.types.ndarray(), probe_sensor_idx: qd.types.ndarray(), + probe_radii: qd.types.ndarray(), sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), n_probes_per_sensor: qd.types.ndarray(), lambda_d: qd.types.ndarray(), dilate_scale: qd.types.ndarray(), + normal_exponent: qd.types.ndarray(), probe_depth_buf: qd.types.ndarray(), output: qd.types.ndarray(), ): @@ -1138,8 +1086,15 @@ def _kernel_elastomer_dilate_accumulate( cache_start = sensor_cache_start[i_s] lam = lambda_d[i_s] scale = dilate_scale[i_s] + n_exp = normal_exponent[i_s] _i_p = i_p - probe_start + # Inactive filler probe (probe_radius == 0): reads zero, no dilation accumulated. + if probe_radii[i_p] <= gs.qd_float(0.0): + for k in qd.static(range(3)): + output[cache_start + _i_p * 3 + k, i_b] = gs.qd_float(0.0) + continue + target_local = _func_vec3_at(probe_positions_local, i_p) target_normal = _func_vec3_at(probe_local_normal, i_p) @@ -1151,12 +1106,12 @@ def _kernel_elastomer_dilate_accumulate( continue contribution = _func_elastomer_direct_dilate_contribution( _func_vec3_at(probe_positions_local, j_p), - _func_vec3_at(probe_local_normal, j_p), target_local, target_normal, src_depth, lam, scale, + n_exp, ) for k in qd.static(range(3)): acc[k] = acc[k] + contribution[k] @@ -1330,6 +1285,7 @@ def _kernel_elastomer_shear_accumulate( probe_positions_local: qd.types.ndarray(), probe_local_normal: qd.types.ndarray(), probe_sensor_idx: qd.types.ndarray(), + probe_radii: qd.types.ndarray(), sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), sensor_pc_start: qd.types.ndarray(), @@ -1347,8 +1303,8 @@ def _kernel_elastomer_shear_accumulate( that are flagged ``surface_initialized`` and sum Gaussian contributions into a register, then += the result into ``output``. No atomic_add (each (i_b, i_p) thread owns its output slot). - Must run after the surface-state kernel AND after the Patch-3 torch cleanup that invalidates - ``surface_initialized_buf`` for BVH-pruned points -- otherwise stale True flags from prior + Must run after the surface-state kernel AND after the post-kernel ``surface_initialized_buf &= candidate`` + cleanup that invalidates the flag for BVH-pruned points -- otherwise stale True flags from prior steps would corrupt this step's accumulation. """ total_n_probes = probe_positions_local.shape[0] @@ -1359,6 +1315,9 @@ def _kernel_elastomer_shear_accumulate( scale = shear_scale[i_s] if scale <= gs.qd_float(0.0): continue + # Inactive filler probe (probe_radius == 0): reads zero (dilate already wrote 0 to this output slot). + if probe_radii[i_p] <= gs.qd_float(0.0): + continue lam = lambda_s[i_s] cache_start = sensor_cache_start[i_s] _i_p = i_p - sensor_probe_start[i_s] @@ -1409,11 +1368,13 @@ def _kernel_elastomer_shear_accumulate( def _elastomer_taxel_grid_fft_dilate( - fft_grid_meta: list[tuple[int, int, int, int, int, float, float, float]], - fft_grid_kernels_stacked: torch.Tensor, + grid_fft_meta: list[tuple], + grid_fft_kernels_stacked: torch.Tensor, probe_depth_buf: torch.Tensor, - fft_depth_buffer: torch.Tensor, + probe_radii: torch.Tensor, + grid_fft_buffer: torch.Tensor, dilate_scale: torch.Tensor, + normal_exponent: torch.Tensor, grid_normal: torch.Tensor, grid_tangent_u: torch.Tensor, grid_tangent_v: torch.Tensor, @@ -1423,59 +1384,73 @@ def _elastomer_taxel_grid_fft_dilate( """ Elastomer marker dilation via 2D FFT in the validated probe tangent basis. - All grid sensors share the global ``fft_max_n`` (= last two dims of ``fft_depth_buffer``); their - kernels are stacked into ``fft_grid_kernels_stacked`` of shape (n_grid, 3, fft_max_n[0], - fft_max_n[1]). The four heavy FFTs (fft of H, fft of H*H, ifft for Kx/Ky/Kn) thus run as - batched ops over the grid-sensor axis, dropping launches from 4ยทn_grid to 4. The H-fill and - write-back stages remain per-sensor (small Python loops over view/copy and per-sensor tangent - decomposition). + All grid sensors share the global ``grid_fft_max_n`` (= last two dims of ``grid_fft_buffer``); their + kernels are stacked into ``grid_fft_kernels_stacked`` of shape (n_grid, 3, fft_ny, fft_nx). The four heavy + FFTs (fft of H, fft of H**normal_exponent, ifft for Ku/Kv/Kn) thus run as batched ops over the grid-sensor + axis, dropping + launches from 4ยทn_grid to 4. The H-fill and write-back stages remain per-sensor (small Python loops over + view/copy and per-sensor tangent decomposition). Grid axes are ``(ny, nx)`` row-major throughout (matching + the probe flat index ``iy * nx + ix``), so no transpose is needed on either the fill or write-back side. """ - if not fft_grid_meta: + if not grid_fft_meta: return n_batches = probe_depth_buf.shape[0] - - # 1) Fill the (B, n_grid, fft_max_nx, fft_max_ny) depth buffer. Per-sensor view+copy only. - fft_depth_buffer.zero_() - for grid_pos, (_, g_nx, g_ny, probe_start, _, _, _, _) in enumerate(fft_grid_meta): - depth_slice = probe_depth_buf[:, probe_start : probe_start + g_nx * g_ny] - fft_depth_buffer[:, grid_pos, :g_nx, :g_ny].copy_(depth_slice.view(n_batches, g_ny, g_nx).transpose(1, 2)) - - # 2) Batched FFTs across (B, n_grid). Broadcast over B when multiplying by per-sensor kernels. - H_fft = torch.fft.fft2(fft_depth_buffer) - H2_fft = torch.fft.fft2(fft_depth_buffer * fft_depth_buffer) - Kx_all = fft_grid_kernels_stacked[:, 0] # (n_grid, fft_max_nx, fft_max_ny) complex - Ky_all = fft_grid_kernels_stacked[:, 1] - Kn_all = fft_grid_kernels_stacked[:, 2] - disp_u_all = torch.fft.ifft2(H_fft * Kx_all).real # (B, n_grid, fft_max_nx, fft_max_ny) - disp_v_all = torch.fft.ifft2(H_fft * Ky_all).real - disp_n_all = torch.fft.ifft2(H2_fft * Kn_all).real - - # 3) Per-sensor write-back: slice to (g_nx, g_ny), apply scale + tangent decomposition, copy + fft_ny, fft_nx = grid_fft_buffer.shape[-2], grid_fft_buffer.shape[-1] + + # 1) Fill the active region of the (B, n_grid, fft_ny, fft_nx) depth buffer. The zero-padding region is never + # written here and stays zero from allocation, so no per-step ``zero_()`` is needed. + for grid_pos, (_, g_ny, g_nx, probe_start, _, _, _, _) in enumerate(grid_fft_meta): + depth_slice = probe_depth_buf[:, probe_start : probe_start + g_ny * g_nx] + grid_fft_buffer[:, grid_pos, :g_ny, :g_nx].copy_(depth_slice.view(n_batches, g_ny, g_nx)) + + # 2) Batched real FFTs across (B, n_grid). Inputs are real so ``rfft2`` (half spectrum) is ~2x cheaper than the + # full complex ``fft2``. Kernels broadcast over B when multiplying. + H_fft = torch.fft.rfft2(grid_fft_buffer) + # The normal channel follows depth ** normal_exponent, so it convolves the per-grid powered depth field; + # the tangential (u, v) channels stay linear in depth and convolve the raw field H. + exps = normal_exponent[[meta[0] for meta in grid_fft_meta]].reshape(1, -1, 1, 1) + Hp_fft = torch.fft.rfft2(grid_fft_buffer.pow(exps)) + Ku_all = grid_fft_kernels_stacked[:, 0] # (n_grid, fft_ny, fft_nx // 2 + 1) complex + Kv_all = grid_fft_kernels_stacked[:, 1] + Kn_all = grid_fft_kernels_stacked[:, 2] + disp_u_all = torch.fft.irfft2(H_fft * Ku_all, s=(fft_ny, fft_nx)) # (B, n_grid, fft_ny, fft_nx) + disp_v_all = torch.fft.irfft2(H_fft * Kv_all, s=(fft_ny, fft_nx)) + disp_n_all = torch.fft.irfft2(Hp_fft * Kn_all, s=(fft_ny, fft_nx)) + + # 3) Per-sensor write-back: slice to (g_ny, g_nx), apply scale + tangent decomposition, copy # into the sensor's output range. Tangent vectors are per-sensor so can't trivially batch here. - for grid_pos, meta in enumerate(fft_grid_meta): - sensor_idx, g_nx, g_ny, _, cache_start, _, _, _ = meta + for grid_pos, meta in enumerate(grid_fft_meta): + sensor_idx, g_ny, g_nx, probe_start, cache_start, _, _, _ = meta scale_s = dilate_scale[sensor_idx] - disp_u = disp_u_all[:, grid_pos, :g_nx, :g_ny] * scale_s - disp_v = disp_v_all[:, grid_pos, :g_nx, :g_ny] * scale_s - disp_n = disp_n_all[:, grid_pos, :g_nx, :g_ny] * scale_s - # Cache order is probe flat index iy*nx+ix; (g_nx, g_ny) transpose(1, 2).reshape gives (g_ny, g_nx) -> iy*nx+ix. - disp_u_flat = disp_u.transpose(1, 2).reshape(n_batches, -1) - disp_v_flat = disp_v.transpose(1, 2).reshape(n_batches, -1) - disp_n_flat = disp_n.transpose(1, 2).reshape(n_batches, -1) - grid_size = g_nx * g_ny * 3 + disp_u = disp_u_all[:, grid_pos, :g_ny, :g_nx] * scale_s + disp_v = disp_v_all[:, grid_pos, :g_ny, :g_nx] * scale_s + disp_n = disp_n_all[:, grid_pos, :g_ny, :g_nx] * scale_s + # (B, g_ny, g_nx) reshapes directly to the probe flat index iy*nx+ix -- no transpose. + disp_u_flat = disp_u.reshape(n_batches, -1) + disp_v_flat = disp_v.reshape(n_batches, -1) + disp_n_flat = disp_n.reshape(n_batches, -1) + grid_size = g_ny * g_nx * 3 out_block = grid_dilate_out_buffer[:, :grid_size] tangent_u = grid_tangent_u[sensor_idx] tangent_v = grid_tangent_v[sensor_idx] normal = grid_normal[sensor_idx] + # Zero inactive filler probes (probe_radius == 0): they are non-sources, but the FFT still smears + # neighbour dilation into their cells, so mask the per-probe write-back. + active = (probe_radii[probe_start : probe_start + g_ny * g_nx] > 0.0).to(disp_u_flat.dtype) for k in range(3): out_block[:, k:grid_size:3] = ( disp_u_flat * tangent_u[k] + disp_v_flat * tangent_v[k] + disp_n_flat * normal[k] - ) + ) * active output[cache_start : cache_start + grid_size].copy_(out_block.T) @dataclass -class ElastomerTaxelSensorMetadata(PointCloudTactileSharedMetadata, ProbesWithNormalSensorMetadataMixin): +class ElastomerTaxelSensorMetadata( + ViscoelasticHysteresisMetadataMixin, + GridFFTConvMetadataMixin, + PointCloudTactileSharedMetadata, + ProbesWithNormalSensorMetadataMixin, +): track_geom_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) track_geom_active_envs_mask: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_bool) sensor_track_geom_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) @@ -1490,6 +1465,7 @@ class ElastomerTaxelSensorMetadata(PointCloudTactileSharedMetadata, ProbesWithNo lambda_s: torch.Tensor = make_tensor_field((0,)) dilate_scale: torch.Tensor = make_tensor_field((0,)) shear_scale: torch.Tensor = make_tensor_field((0,)) + normal_exponent: torch.Tensor = make_tensor_field((0,)) elastomer_contact_sdf_enter: torch.Tensor = make_tensor_field((0,)) elastomer_contact_sdf_exit: torch.Tensor = make_tensor_field((0,)) @@ -1504,27 +1480,15 @@ class ElastomerTaxelSensorMetadata(PointCloudTactileSharedMetadata, ProbesWithNo # stale surface_initialized / surface_entry_pos for points the BVH skipped this step. surface_candidate_buf: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_bool) - is_grid: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_bool) + # Per-sensor flag selecting the FFT dilation path vs the direct (non-grid) dilation kernel. use_grid_fft: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_bool) - grid_n: torch.Tensor = make_tensor_field((0, 2), dtype_factory=lambda: gs.tc_int) - grid_spacing: torch.Tensor = make_tensor_field((0, 2)) + # Per-grid-FFT-sensor tangent basis, consumed by the dilation write-back. ``grid_fft_meta`` tuples for this + # sensor are ``(sensor_idx, g_ny, g_nx, probe_start, cache_start, lambda_d, spacing_u, spacing_v)``. grid_normal: torch.Tensor = make_tensor_field((0, 3)) grid_tangent_u: torch.Tensor = make_tensor_field((0, 3)) grid_tangent_v: torch.Tensor = make_tensor_field((0, 3)) - # Stacked complex FFT kernels for all grid-FFT sensors, shape (n_grid, 3, fft_max_n[0], - # fft_max_n[1]). All sensors share fft_max_n so torch.fft.fft2 batches across the grid axis - # in a single launch. When a new grid sensor expands fft_max_n at build time, prior sensors' - # kernels are recomputed at the new size so the stack stays uniform. - fft_grid_kernels_stacked: torch.Tensor = make_tensor_field((0, 0, 0, 0), dtype_factory=lambda: torch.complex64) - fft_depth_buffer: torch.Tensor = make_tensor_field((0, 0, 0, 0)) + # Scratch for the per-sensor tangent-decomposition write-back, lazily grown to the largest grid. grid_dilate_out_buffer: torch.Tensor = make_tensor_field((0, 0)) - # Per-grid-FFT-sensor metadata captured at build time. Tuple fields: - # (sensor_idx, g_nx, g_ny, probe_start, cache_start, lambda_d, spacing_u, spacing_v). - # Indexed positionally to match rows of fft_grid_kernels_stacked / sensor axis of - # fft_depth_buffer. Iterating it directly avoids per-step .item() device syncs. - fft_grid_meta: list[tuple[int, int, int, int, int, float, float, float]] = field(default_factory=list) - # Global max FFT size across all grid sensors. Mutated only at build time. - fft_max_n: tuple[int, int] = (0, 0) # True iff at least one configured ElastomerTaxel has shear_scale > 0. Set during build by OR-ing # each sensor's value, so per-step gating avoids an O(n_sensors) reduction + device sync. @@ -1532,6 +1496,7 @@ class ElastomerTaxelSensorMetadata(PointCloudTactileSharedMetadata, ProbesWithNo class ElastomerTaxelSensor( + ViscoelasticHysteresisMixin[ElastomerTaxelSensorMetadata], PointCloudTactileSensorMixin[ElastomerTaxelSensorMetadata], ProbesWithNormalSensorMixin[ElastomerTaxelSensorMetadata], RigidSensorMixin[ElastomerTaxelSensorMetadata], @@ -1539,20 +1504,15 @@ class ElastomerTaxelSensor( ): def __init__(self, sensor_options: ElastomerTaxelSensorOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - - self._is_grid = self._probe_local_pos.ndim > 2 - self._shape = self._probe_local_pos.shape[:-1] - - (probe_pos, probe_normals, use_grid_fft, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing) = ( - _normalize_elastomer_probe_layout( + # FFT-grid eligibility check (flat pos/normals are already populated by the base mixins). + is_grid = len(self._probe_layout_shape) == 2 + _, _, self._use_grid_fft, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( + normalize_grid_probe_layout( np.asarray(sensor_options.probe_local_pos, dtype=gs.np_float), np.asarray(sensor_options.probe_local_normal, dtype=gs.np_float), - self._is_grid, + is_grid, ) ) - self._probe_local_pos = torch.tensor(probe_pos, dtype=gs.tc_float, device=gs.device) - self._probe_local_normal = torch.tensor(probe_normals, dtype=gs.tc_float, device=gs.device) - self._use_grid_fft = use_grid_fft self._grid_normal = torch.tensor(grid_normal, dtype=gs.tc_float, device=gs.device) self._grid_tangent_u = torch.tensor(grid_tangent_u, dtype=gs.tc_float, device=gs.device) self._grid_tangent_v = torch.tensor(grid_tangent_v, dtype=gs.tc_float, device=gs.device) @@ -1613,11 +1573,12 @@ def build(self): self._shared_metadata.dilate_scale = concat_with_tensor( self._shared_metadata.dilate_scale, float(self._options.dilate_scale), expand=(1,) ) + self._shared_metadata.normal_exponent = concat_with_tensor( + self._shared_metadata.normal_exponent, float(self._options.normal_exponent), expand=(1,) + ) self._shared_metadata.shear_scale = concat_with_tensor( self._shared_metadata.shear_scale, float(self._options.shear_scale), expand=(1,) ) - if float(self._options.shear_scale) > 0.0: - self._shared_metadata.any_shear = True self._shared_metadata.elastomer_contact_sdf_enter = concat_with_tensor( self._shared_metadata.elastomer_contact_sdf_enter, float(self._options.elastomer_contact_sdf_enter), @@ -1628,6 +1589,8 @@ def build(self): float(self._options.elastomer_contact_sdf_exit), expand=(1,), ) + if float(self._options.shear_scale) > 0.0: + self._shared_metadata.any_shear = True self._shared_metadata.probe_depth_buf = torch.zeros( (B, self._shared_metadata.total_n_probes), dtype=gs.tc_float, device=gs.device @@ -1648,61 +1611,40 @@ def build(self): (B, total_n_surface), dtype=gs.tc_bool, device=gs.device ) - self._shared_metadata.is_grid = concat_with_tensor(self._shared_metadata.is_grid, self._is_grid, expand=(1,)) self._shared_metadata.use_grid_fft = concat_with_tensor( self._shared_metadata.use_grid_fft, self._use_grid_fft, expand=(1,) ) - grid_n = torch.tensor((0, 0), dtype=gs.tc_int, device=gs.device) - grid_spacing = torch.tensor((0.0, 0.0), dtype=gs.tc_float, device=gs.device) grid_normal = torch.zeros(3, dtype=gs.tc_float, device=gs.device) grid_tangent_u = torch.zeros(3, dtype=gs.tc_float, device=gs.device) grid_tangent_v = torch.zeros(3, dtype=gs.tc_float, device=gs.device) if self._use_grid_fft: - nx, ny = int(self._shape[1]), int(self._shape[0]) - grid_n = torch.tensor((nx, ny), dtype=gs.tc_int, device=gs.device) - grid_spacing = self._grid_spacing + nx, ny = int(self._probe_layout_shape[1]), int(self._probe_layout_shape[0]) grid_normal = self._grid_normal grid_tangent_u = self._grid_tangent_u grid_tangent_v = self._grid_tangent_v - spacing_u, spacing_v = float(grid_spacing[0].item()), float(grid_spacing[1].item()) - this_fft_n = tuple(_next_pow2(2 * n - 1) for n in (nx, ny)) + spacing_u, spacing_v = float(self._grid_spacing[0].item()), float(self._grid_spacing[1].item()) + # FFT size is (ny, nx) row-major. Sizing each axis to ``2n - 1`` (the full linear-convolution support) + # rounded up to a power of 2 guarantees zero circular wraparound regardless of the dilation kernel's + # decay -- the ``x*g`` / ``y*g`` first-moment kernels decay slower than the Gaussian itself. + this_fft_n = (next_pow2(2 * ny - 1), next_pow2(2 * nx - 1)) cache_start_py = int(self._shared_metadata.sensor_cache_start[self._idx].item()) - self._shared_metadata.fft_grid_meta.append( - ( + register_grid_fft_sensor( + self._shared_metadata, + meta_entry=( self._idx, - nx, ny, + nx, self._probe_start_idx, cache_start_py, float(self._options.lambda_d), spacing_u, spacing_v, - ) - ) - - # Expand the global FFT size if this sensor needs more padding. When that happens, all - # prior grid sensors' kernels are rebuilt at the new size (their FFTs depend on the - # transform length, so frequency-domain padding wouldn't be equivalent). - prev_max = self._shared_metadata.fft_max_n - new_max = (max(prev_max[0], this_fft_n[0]), max(prev_max[1], this_fft_n[1])) - self._shared_metadata.fft_max_n = new_max - n_grid = len(self._shared_metadata.fft_grid_meta) - stacked = torch.empty( - (n_grid, 3, new_max[0], new_max[1]), - dtype=torch.complex64, - device=gs.device, - ) - for grid_pos, (_, _, _, _, _, lam_d, sp_u, sp_v) in enumerate(self._shared_metadata.fft_grid_meta): - stacked[grid_pos] = _precompute_hydroshear_dilate_kernel_fft( - lam_d, (sp_u, sp_v), new_max, gs.device, gs.tc_float - ) - self._shared_metadata.fft_grid_kernels_stacked = stacked - - # fft_depth_buffer is keyed by grid-sensor position (not raw sensor i_s), sized at the - # current global max FFT size. Reallocate every time we add a grid sensor. - self._shared_metadata.fft_depth_buffer = torch.zeros( - (B, n_grid, new_max[0], new_max[1]), dtype=gs.tc_float, device=gs.device + ), + this_fft_n=this_fft_n, + kernel_builder=_dilate_kernel_builder, + n_buffer_channels=0, + batch_size=B, ) grid_size = nx * ny * 3 out_buf = self._shared_metadata.grid_dilate_out_buffer @@ -1713,10 +1655,6 @@ def build(self): device=gs.device, ) - self._shared_metadata.grid_n = concat_with_tensor(self._shared_metadata.grid_n, grid_n, expand=(1, 2)) - self._shared_metadata.grid_spacing = concat_with_tensor( - self._shared_metadata.grid_spacing, grid_spacing, expand=(1, 2) - ) self._shared_metadata.grid_normal = concat_with_tensor( self._shared_metadata.grid_normal, grid_normal, expand=(1, 3) ) @@ -1728,7 +1666,7 @@ def build(self): ) def _get_return_format(self) -> tuple[int, ...]: - return (self._n_probes, 3) + return (*self._probe_layout_shape, 3) @classmethod def _get_cache_dtype(cls) -> torch.dtype: @@ -1742,6 +1680,27 @@ def reset(cls, shared_metadata: ElastomerTaxelSensorMetadata, shared_ground_trut # implicitly invalidated by clearing it; surface_candidate_buf is .zero_()'d at step start. shared_metadata.surface_initialized_buf[envs_idx, :] = False + @classmethod + def _apply_transform( + cls, + shared_metadata: ElastomerTaxelSensorMetadata, + data: torch.Tensor, + timeline: "TensorRingBuffer", + *, + is_measured: bool, + ): + super()._apply_transform(shared_metadata, data, timeline, is_measured=is_measured) + if not is_measured: + return + # ElastomerTaxel's kernel writes a single output used for both GT and measured (measured is .copy_'d from + # GT), so per-probe gain is applied here as a post-step multiplication on the measured branch only. + # Approximation note: tangential dilation and shear scale linearly with gain (exact), but the H^2 + # normal-dilation term ideally scales as gain^2 -- here we apply gain^1 across all components. For typical + # gains near 1 this is a small error; for large deviations the normal component will be slightly off. + cls._maybe_build_cache_col_probe_idx(shared_metadata, data) + gain_per_col = shared_metadata.probe_gains[:, shared_metadata.cache_col_probe_idx] + data.mul_(gain_per_col) + @classmethod def _update_current_timestep_data( cls, @@ -1760,6 +1719,7 @@ def _update_current_timestep_data( _kernel_elastomer_probe_depth( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, shared_metadata.links_idx, shared_metadata.sensor_track_geom_start, shared_metadata.sensor_track_geom_n, @@ -1776,22 +1736,26 @@ def _update_current_timestep_data( shared_metadata.probe_positions, shared_metadata.probe_local_normal, shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, shared_metadata.n_probes_per_sensor, shared_metadata.lambda_d, shared_metadata.dilate_scale, + shared_metadata.normal_exponent, shared_metadata.probe_depth_buf, current_ground_truth_data_T, ) # FFT runs after the qd dilate kernel: on Metal, write-only kernel outputs zero unwritten slots on copy-back, # which would erase the grid range the FFT just wrote. _elastomer_taxel_grid_fft_dilate( - shared_metadata.fft_grid_meta, - shared_metadata.fft_grid_kernels_stacked, + shared_metadata.grid_fft_meta, + shared_metadata.grid_fft_kernels_stacked, shared_metadata.probe_depth_buf, - shared_metadata.fft_depth_buffer, + shared_metadata.probe_radii, + shared_metadata.grid_fft_buffer, shared_metadata.dilate_scale, + shared_metadata.normal_exponent, shared_metadata.grid_normal, shared_metadata.grid_tangent_u, shared_metadata.grid_tangent_v, @@ -1844,6 +1808,7 @@ def _update_current_timestep_data( shared_metadata.probe_positions, shared_metadata.probe_local_normal, shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, shared_metadata.sensor_pc_start, @@ -1863,6 +1828,10 @@ def _update_current_timestep_data( measured.copy_(current_ground_truth_data_T.T) def _draw_debug(self, context: "RasterizerContext"): - self._draw_debug_probes( - context, lambda envs_idx: tensor_to_array(torch.linalg.norm(self.read_ground_truth(envs_idx), dim=-1)) - ) + def mask(envs_idx): + disp = self.read_ground_truth(envs_idx) + if self._options.history_length > 0: + disp = disp.select(1 if self._manager._sim.n_envs > 0 else 0, -1) + return torch.linalg.norm(disp, dim=-1) >= gs.EPS + + self._draw_debug_probes(context, self._tactile_color_groups_fn(mask)) diff --git a/genesis/engine/sensors/probe.py b/genesis/engine/sensors/probe.py index 92964c69cd..0adda1a94c 100644 --- a/genesis/engine/sensors/probe.py +++ b/genesis/engine/sensors/probe.py @@ -1,5 +1,5 @@ -from dataclasses import dataclass -from typing import TYPE_CHECKING, Generic, TypeVar +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Callable, Generic, TypeVar import numpy as np import quadrants as qd @@ -7,10 +7,12 @@ import genesis as gs import genesis.utils.geom as gu +from genesis.options.sensors.tactile import TactileProbeSensorOptionsMixin from genesis.utils.misc import concat_with_tensor, make_tensor_field, tensor_to_array if TYPE_CHECKING: from genesis.options.sensors.options import SensorOptions + from genesis.utils.ring_buffer import TensorRingBuffer from genesis.vis.rasterizer_context import RasterizerContext from .sensor_manager import SensorManager @@ -35,25 +37,65 @@ class ProbeSensorMetadataMixin: probe_positions: torch.Tensor = make_tensor_field((0, 3)) probe_radii: torch.Tensor = make_tensor_field((0,)) probe_radii_noise: torch.Tensor = make_tensor_field((0,)) + has_any_probe_radius_noise: bool = False + has_any_probe_gain: bool = False n_probes_per_sensor: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) probe_sensor_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) sensor_cache_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) sensor_probe_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - # Class-level scratch for the kernel-writes-(cols, B) -> ring-slot-(B, cols) transpose-copy pattern. Lazy-allocated - # on first hot-path call to avoid per-step `torch.empty_like` allocations. + measured_scratch_T: torch.Tensor = make_tensor_field((0, 0)) + probe_gains: torch.Tensor = make_tensor_field((0, 0)) + probe_gain_resample_low: torch.Tensor = make_tensor_field((0,)) + probe_gain_resample_high: torch.Tensor = make_tensor_field((0,)) + probe_has_gain_resample: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_bool) + any_gain_resample: bool = False + + dead_taxel_mask: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_bool) + dead_taxel_values: torch.Tensor = make_tensor_field((0, 0)) + dead_taxel_probability: torch.Tensor = make_tensor_field((0,)) + dead_taxel_value_low: torch.Tensor = make_tensor_field((0,)) + dead_taxel_value_high: torch.Tensor = make_tensor_field((0,)) + any_dead_taxel: bool = False + dead_mask_per_col: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_bool) + dead_values_per_col: torch.Tensor = make_tensor_field((0, 0)) + dead_dirty: bool = True + cache_col_probe_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: torch.long) + cache_col_n_channel_groups: list[int] = field(default_factory=list) + ProbeSensorSharedMetadataT = TypeVar("ProbeSensorSharedMetadataT", bound=ProbeSensorMetadataMixin) +def get_measured_bufs( + shared_metadata: "ProbeSensorMetadataMixin", + current_ground_truth_data_T: torch.Tensor, + measured_data_timeline: "TensorRingBuffer", +) -> tuple[torch.Tensor, torch.Tensor]: + current_ground_truth_data_T.zero_() + measured_slot = measured_data_timeline.at(0, copy=False) + measured_slot.zero_() + if shared_metadata.measured_scratch_T.shape != current_ground_truth_data_T.shape: + shared_metadata.measured_scratch_T = torch.empty_like(current_ground_truth_data_T) + return measured_slot, shared_metadata.measured_scratch_T + + class ProbeSensorMixin(Generic[ProbeSensorSharedMetadataT]): """Shared logic for registering this sensor's probes in ``ProbeSensorMetadataMixin`` fields.""" + # Number of channel groups per probe in the cache layout. Used by the per-cache-col probe-index builder. + _taxel_channel_groups: int = 1 + def __init__(self, sensor_options: "SensorOptions", sensor_idx: int, sensor_manager: "SensorManager"): - # `_get_return_format` runs inside `super().__init__`, so `_probe_local_pos` / `_n_probes` must already be set. - self._probe_local_pos = torch.tensor(sensor_options.probe_local_pos, dtype=gs.tc_float, device=gs.device) - self._n_probes = int(np.prod(self._probe_local_pos.shape[:-1])) + # `_get_return_format` runs inside `super().__init__`, so `_probe_local_pos` / `_n_probes` / + # `_probe_layout_shape` must already be set. ``_probe_layout_shape`` is the input layout sans the trailing + # ``xyz`` axis: ``(N,)`` for a flat probe list or ``(M, N)`` for a 2D grid. Probe-axis storage is flat. + raw_pos = torch.tensor(sensor_options.probe_local_pos, dtype=gs.tc_float, device=gs.device) + self._probe_layout_shape = raw_pos.shape[:-1] + self._n_probes = int(np.prod(self._probe_layout_shape)) + self._probe_local_pos = raw_pos.reshape(self._n_probes, 3).contiguous() + self._debug_objects: list = [] super().__init__(sensor_options, sensor_idx, sensor_manager) def build(self) -> None: @@ -81,7 +123,9 @@ def build(self) -> None: if isinstance(self._options.probe_radius, float): probe_radii = torch.full((self._n_probes,), self._options.probe_radius, dtype=gs.tc_float, device=gs.device) else: - probe_radii = torch.tensor(self._options.probe_radius, dtype=gs.tc_float, device=gs.device) + probe_radii = torch.tensor(self._options.probe_radius, dtype=gs.tc_float, device=gs.device).reshape( + self._n_probes + ) self._shared_metadata.probe_radii = concat_with_tensor( self._shared_metadata.probe_radii, probe_radii, expand=(self._n_probes,) ) @@ -90,6 +134,186 @@ def build(self) -> None: torch.full((self._n_probes,), self._options.probe_radius_noise, dtype=gs.tc_float, device=gs.device), expand=(self._n_probes,), ) + if self._options.probe_radius_noise > 0.0: + self._shared_metadata.has_any_probe_radius_noise = True + + # Tactile-specific options (probe_gain, dead_taxel_*) live on ``TactileProbeSensorOptionsMixin``; generic + # probe sensors (e.g. SurfaceDistanceProbe) don't carry them and register defaults (gain 1, no dead). + B = self._manager._sim._B + opts = self._options + is_tactile = isinstance(opts, TactileProbeSensorOptionsMixin) + # Initial per-probe gain (probe_gain may be scalar or per-probe array). + gain_value = opts.probe_gain if is_tactile else 1.0 + if isinstance(gain_value, float) or isinstance(gain_value, int): + init_gain = torch.full((B, self._n_probes), float(gain_value), dtype=gs.tc_float, device=gs.device) + if float(gain_value) != 1.0: + self._shared_metadata.has_any_probe_gain = True + else: + init_gain = ( + torch.tensor(gain_value, dtype=gs.tc_float, device=gs.device) + .reshape(self._n_probes) + .unsqueeze(0) + .expand(B, self._n_probes) + .contiguous() + ) + if not bool((init_gain == 1.0).all().item()): + self._shared_metadata.has_any_probe_gain = True + self._shared_metadata.probe_gains = concat_with_tensor( + self._shared_metadata.probe_gains, init_gain, expand=(B, self._n_probes), dim=1 + ) + + # Per-probe gain resample range (constant across envs). When option is None, write zeros + has_resample=False; + # the reset hook gates on ``has_gain_resample`` per probe. + resample_range = opts.probe_gain_resample_range if is_tactile else None + if resample_range is None: + low, high = 0.0, 0.0 + has_resample = False + else: + low, high = float(resample_range[0]), float(resample_range[1]) + has_resample = True + self._shared_metadata.any_gain_resample = True + # Resampled gain is generally != 1, so the measured branch can't be assumed equal to GT. + self._shared_metadata.has_any_probe_gain = True + self._shared_metadata.probe_gain_resample_low = concat_with_tensor( + self._shared_metadata.probe_gain_resample_low, + torch.full((self._n_probes,), low, dtype=gs.tc_float, device=gs.device), + expand=(self._n_probes,), + ) + self._shared_metadata.probe_gain_resample_high = concat_with_tensor( + self._shared_metadata.probe_gain_resample_high, + torch.full((self._n_probes,), high, dtype=gs.tc_float, device=gs.device), + expand=(self._n_probes,), + ) + self._shared_metadata.probe_has_gain_resample = concat_with_tensor( + self._shared_metadata.probe_has_gain_resample, + torch.full((self._n_probes,), has_resample, dtype=gs.tc_bool, device=gs.device), + expand=(self._n_probes,), + ) + + # Per-probe dead taxel configuration (constant across envs). + dead_prob = float(opts.dead_taxel_probability) if is_tactile else 0.0 + dead_range = opts.dead_taxel_value_range if is_tactile else (0.0, 0.0) + s_low, s_high = float(dead_range[0]), float(dead_range[1]) + self._shared_metadata.dead_taxel_probability = concat_with_tensor( + self._shared_metadata.dead_taxel_probability, + torch.full((self._n_probes,), dead_prob, dtype=gs.tc_float, device=gs.device), + expand=(self._n_probes,), + ) + self._shared_metadata.dead_taxel_value_low = concat_with_tensor( + self._shared_metadata.dead_taxel_value_low, + torch.full((self._n_probes,), s_low, dtype=gs.tc_float, device=gs.device), + expand=(self._n_probes,), + ) + self._shared_metadata.dead_taxel_value_high = concat_with_tensor( + self._shared_metadata.dead_taxel_value_high, + torch.full((self._n_probes,), s_high, dtype=gs.tc_float, device=gs.device), + expand=(self._n_probes,), + ) + if dead_prob > 0.0: + self._shared_metadata.any_dead_taxel = True + # Allocate / extend the per-(env, probe) dead buffers to the new total probe count. + self._shared_metadata.dead_taxel_mask = torch.zeros( + (B, self._shared_metadata.total_n_probes), dtype=gs.tc_bool, device=gs.device + ) + self._shared_metadata.dead_taxel_values = torch.zeros( + (B, self._shared_metadata.total_n_probes), dtype=gs.tc_float, device=gs.device + ) + # Invalidate the lazy cache-col probe index; rebuilt on next dead apply. + self._shared_metadata.cache_col_probe_idx = torch.empty((0,), dtype=torch.long, device=gs.device) + self._shared_metadata.cache_col_n_channel_groups.append(self._taxel_channel_groups) + + @classmethod + def reset(cls, shared_metadata, shared_ground_truth_cache, envs_idx): + super().reset(shared_metadata, shared_ground_truth_cache, envs_idx) + # Resample per-(env, probe) gain for probes whose sensor configured a resample range. + if shared_metadata.any_gain_resample and shared_metadata.probe_gains.numel() > 0: + mask = shared_metadata.probe_has_gain_resample.unsqueeze(0) # (1, total_n_probes) + low = shared_metadata.probe_gain_resample_low.unsqueeze(0) + high = shared_metadata.probe_gain_resample_high.unsqueeze(0) + sub = shared_metadata.probe_gains[envs_idx] + new_gain = torch.rand_like(sub) * (high - low) + low + shared_metadata.probe_gains[envs_idx] = torch.where(mask, new_gain, sub) + # Resample dead mask + values per env for affected probes. + if shared_metadata.any_dead_taxel and shared_metadata.dead_taxel_mask.numel() > 0: + prob = shared_metadata.dead_taxel_probability.unsqueeze(0) # (1, total_n_probes) + n_envs = shared_metadata.dead_taxel_mask[envs_idx].shape[0] + rolls = torch.rand((n_envs, shared_metadata.total_n_probes), device=gs.device, dtype=gs.tc_float) + new_mask = rolls < prob + shared_metadata.dead_taxel_mask[envs_idx] = new_mask + low = shared_metadata.dead_taxel_value_low.unsqueeze(0) + high = shared_metadata.dead_taxel_value_high.unsqueeze(0) + uniforms = torch.rand((n_envs, shared_metadata.total_n_probes), device=gs.device, dtype=gs.tc_float) + shared_metadata.dead_taxel_values[envs_idx] = uniforms * (high - low) + low + # The per-cache-column broadcast is now stale; rebuilt on the next `_apply_hardware_imperfections`. + shared_metadata.dead_dirty = True + + @gs.assert_built + def set_probe_gain(self, value, envs_idx=None): + """Set the per-probe measured-branch contact-depth gain for the given envs. + + ``value`` may be a scalar (broadcast to all probes of this sensor), or an array of length ``n_probes``. + Affects only the probes registered by this sensor instance. + """ + envs_idx = self._sanitize_envs_idx(envs_idx) + probe_start = int(self._shared_metadata.sensor_probe_start[self._idx].item()) + probe_slice = slice(probe_start, probe_start + self._n_probes) + if isinstance(value, (int, float)): + row = torch.full((len(envs_idx), self._n_probes), float(value), dtype=gs.tc_float, device=gs.device) + else: + t = torch.as_tensor(value, dtype=gs.tc_float, device=gs.device).reshape(-1) + if t.numel() != self._n_probes: + gs.raise_exception(f"set_probe_gain expected {self._n_probes} values, got {t.numel()}.") + row = t.unsqueeze(0).expand(len(envs_idx), self._n_probes).contiguous() + self._shared_metadata.probe_gains[envs_idx, probe_slice] = row + # Conservatively mark gain in use (a user-set gain may be non-unit); never reset to False. + self._shared_metadata.has_any_probe_gain = True + + @classmethod + def _apply_hardware_imperfections(cls, shared_metadata, measured_slot_0): + super()._apply_hardware_imperfections(shared_metadata, measured_slot_0) + if not shared_metadata.any_dead_taxel: + return + cls._maybe_build_cache_col_probe_idx(shared_metadata, measured_slot_0) + # The per-(env, probe) dead state only changes on reset; broadcast it to per-(env, cache_col) layout once + # (when dirty) instead of gathering every step. + if shared_metadata.dead_dirty or shared_metadata.dead_mask_per_col.shape != measured_slot_0.shape: + idx = shared_metadata.cache_col_probe_idx # (total_cache_size,) + shared_metadata.dead_mask_per_col = shared_metadata.dead_taxel_mask[:, idx] + shared_metadata.dead_values_per_col = shared_metadata.dead_taxel_values[:, idx].to( + dtype=measured_slot_0.dtype + ) + shared_metadata.dead_dirty = False + torch.where( + shared_metadata.dead_mask_per_col, + shared_metadata.dead_values_per_col, + measured_slot_0, + out=measured_slot_0, + ) + + @classmethod + def _maybe_build_cache_col_probe_idx(cls, shared_metadata, tensor): + n_cols = tensor.shape[1] + if shared_metadata.cache_col_probe_idx.shape == (n_cols,): + return + sizes = shared_metadata.cache_sizes + n_probes_per = shared_metadata.n_probes_per_sensor.tolist() + probe_starts = shared_metadata.sensor_probe_start.tolist() + groups = shared_metadata.cache_col_n_channel_groups + # Each sensor's cache columns are ordered (group, probe, component); only the probe axis indexes a probe, + # so its slice is a strided arange: arange(n_p) repeated per-component, tiled over the k groups. + per_sensor = [] + for i_s, cache_size in enumerate(sizes): + n_p = n_probes_per[i_s] + if n_p == 0: + continue + k = groups[i_s] if i_s < len(groups) else 1 + components_per_group = cache_size // (k * n_p) + cols = torch.arange(n_p, dtype=torch.long, device=gs.device) + cols = cols.repeat_interleave(components_per_group).repeat(k) + per_sensor.append(cols + probe_starts[i_s]) + shared_metadata.cache_col_probe_idx = ( + torch.cat(per_sensor) if per_sensor else torch.empty((0,), dtype=torch.long, device=gs.device) + ) @property def probe_local_pos(self) -> torch.Tensor: @@ -128,6 +352,127 @@ def _compute_probes_world_pos(self, context: "RasterizerContext"): ) return envs_idx, n_debug_envs, env_offsets, probe_world.reshape(-1, 3) + def _draw_probe_spheres( + self, + context: "RasterizerContext", + probe_world: np.ndarray, + rgb, + probe_radii: np.ndarray | None = None, + probe_radii_noise: np.ndarray | None = None, + ) -> list: + """ + Draw a small opaque center sphere and a translucent outer sensing sphere at each ``probe_world`` position. + + ``probe_world`` is ``(N, 3)`` (already tiled over rendered envs). ``probe_radii`` and ``probe_radii_noise`` + are the matching ``(N,)`` per-position nominal sensing radius and additive uniform noise; both default to + the per-probe values from shared metadata, tiled to match ``probe_world``. When noise is positive, each + outer sphere is drawn at a fresh sample ``clip(r + U(-noise, +noise), 0, inf)`` rounded to the nearest + ``noise`` magnitude so the unique-radius batches stay small. Returns the created debug objects. + """ + options = self._options + rgb = tuple(float(c) for c in rgb) + center_color = (*rgb, 1.0) + objs = [ + context.draw_debug_spheres( + poss=probe_world, + radius=float(options.debug_probe_center_radius), + color=center_color, + ) + ] + if options.debug_probe_sphere_opacity <= 0.0: + return objs + outer_color = (*rgb, float(options.debug_probe_sphere_opacity)) + probe_start = int(self._shared_metadata.sensor_probe_start[self._idx].item()) + probe_slice = slice(probe_start, probe_start + self._n_probes) + n_tile = probe_world.shape[0] // self._n_probes if self._n_probes > 0 else 0 + n_tile = max(n_tile, 1) + if probe_radii is None: + per_probe = tensor_to_array(self._shared_metadata.probe_radii[probe_slice]).reshape(-1) + probe_radii = np.tile(per_probe, n_tile) + if probe_radii_noise is None: + per_probe_noise = tensor_to_array(self._shared_metadata.probe_radii_noise[probe_slice]).reshape(-1) + probe_radii_noise = np.tile(per_probe_noise, n_tile) + nz = probe_radii_noise > 0.0 + if nz.any(): + jitter = np.random.uniform(-1.0, 1.0, size=probe_radii.shape) * probe_radii_noise + noisy = np.maximum(0.0, probe_radii + jitter) + rounded = probe_radii.astype(float, copy=True) + rounded[nz] = np.round(noisy[nz] / probe_radii_noise[nz]) * probe_radii_noise[nz] + probe_radii = rounded + for r in np.unique(probe_radii): + if r <= 0.0: + continue + mask = probe_radii == r + objs.append( + context.draw_debug_spheres( + poss=probe_world[mask], + radius=float(r), + color=outer_color, + ) + ) + return objs + + def _draw_debug_probes( + self, + context: "RasterizerContext", + color_groups_fn: Callable[[list[int] | None], list[tuple]] | None = None, + ) -> tuple[list[int] | None, int, np.ndarray | None]: + """ + Generic per-probe debug renderer. Clears prior debug objects, then for each provided color group draws the + two-sphere marker (small opaque center + translucent outer sensing sphere) on the selected probe positions. + + ``color_groups_fn(envs_idx)`` returns a list of ``(rgb, mask)`` pairs, where ``rgb`` is a length-3 sequence + and ``mask`` is a flat ``(n_debug_envs * n_probes,)`` bool array (or tensor castable to bool) selecting + which probe positions take that color. Passing ``None`` falls back to a single group covering every probe + in the sensor's ``debug_probe_color`` (no contact-state assumption -- usable by any probe sensor). + + Returns ``(envs_idx, n_debug_envs, env_offsets)`` so subclasses can extend the drawing with additional + debug geometry without recomputing the env layout. + """ + for obj in self._debug_objects: + context.clear_debug_object(obj) + self._debug_objects.clear() + + envs_idx, n_debug_envs, env_offsets, probe_world = self._compute_probes_world_pos(context) + probe_start = int(self._shared_metadata.sensor_probe_start[self._idx].item()) + probe_slice = slice(probe_start, probe_start + self._n_probes) + n_tile = max(n_debug_envs, 1) + all_radii = np.tile(tensor_to_array(self._shared_metadata.probe_radii[probe_slice]).reshape(-1), n_tile) + all_noise = np.tile(tensor_to_array(self._shared_metadata.probe_radii_noise[probe_slice]).reshape(-1), n_tile) + if color_groups_fn is None: + groups = [(self._options.debug_probe_color, np.ones(probe_world.shape[0], dtype=bool))] + else: + groups = color_groups_fn(envs_idx) + for rgb, mask in groups: + mask_arr = np.asarray(tensor_to_array(mask), dtype=bool).reshape(-1) + (probes_idx,) = np.nonzero(mask_arr) + if probes_idx.size == 0: + continue + self._debug_objects.extend( + self._draw_probe_spheres( + context, probe_world[probes_idx], rgb, all_radii[probes_idx], all_noise[probes_idx] + ) + ) + return envs_idx, n_debug_envs, env_offsets + + def _tactile_color_groups_fn( + self, get_is_contact_flat: Callable[[list[int] | None], object] + ) -> Callable[[list[int] | None], list[tuple]]: + """ + Build a ``color_groups_fn`` for the common tactile split: not-in-contact probes get ``debug_probe_color`` + and in-contact probes get ``debug_contact_color``. The sensor's options must expose + ``debug_contact_color`` (i.e. inherit ``TactileProbeSensorOptionsMixin``). + """ + + def fn(envs_idx): + is_contact = np.asarray(tensor_to_array(get_is_contact_flat(envs_idx)), dtype=bool).reshape(-1) + return [ + (self._options.debug_probe_color, ~is_contact), + (self._options.debug_contact_color, is_contact), + ] + + return fn + @dataclass class ProbesWithNormalSensorMetadataMixin(ProbeSensorMetadataMixin): @@ -146,9 +491,11 @@ class ProbesWithNormalSensorMixin(ProbeSensorMixin[ProbesWithNormalSensorSharedM def __init__(self, sensor_options: "SensorOptions", sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - self._probe_local_normal = torch.tensor(self._options.probe_local_normal, dtype=gs.tc_float, device=gs.device) - if self._probe_local_normal.ndim == 1: - self._probe_local_normal = self._probe_local_normal.expand(self._n_probes, 3).contiguous() + raw_normal = torch.tensor(self._options.probe_local_normal, dtype=gs.tc_float, device=gs.device) + if raw_normal.ndim == 1: + self._probe_local_normal = raw_normal.expand(self._n_probes, 3).contiguous() + else: + self._probe_local_normal = raw_normal.reshape(self._n_probes, 3).contiguous() def build(self) -> None: super().build() diff --git a/genesis/engine/sensors/surface_distance_probe.py b/genesis/engine/sensors/surface_distance_probe.py index 858719a4fd..66eaa4ad5f 100644 --- a/genesis/engine/sensors/surface_distance_probe.py +++ b/genesis/engine/sensors/surface_distance_probe.py @@ -14,7 +14,12 @@ from genesis.utils.raycast_qd import get_triangle_vertices from .base_sensor import RigidSensorMetadataMixin, RigidSensorMixin, SimpleSensor, SimpleSensorMetadata -from .probe import ProbeSensorMetadataMixin, ProbeSensorMixin, func_noised_probe_radius +from .probe import ( + ProbeSensorMetadataMixin, + ProbeSensorMixin, + func_noised_probe_radius, + get_measured_bufs, +) if TYPE_CHECKING: from genesis.utils.ring_buffer import TensorRingBuffer @@ -179,13 +184,12 @@ def _kernel_surface_distance_probe( probe_idx_in_sensor = i_p - sensor_probe_start[i_s] cache_start = sensor_cache_start[i_s] - probe_global_idx = sensor_probe_start[i_s] + probe_idx_in_sensor output_gt[cache_start + probe_idx_in_sensor, i_b] = best_dist_gt output_measured[cache_start + probe_idx_in_sensor, i_b] = best_dist_m for j in qd.static(range(3)): - positions_gt[i_b, probe_global_idx, j] = best_point_gt[j] - positions_measured[i_b, probe_global_idx, j] = best_point_m[j] + positions_gt[i_b, i_p, j] = best_point_gt[j] + positions_measured[i_b, i_p, j] = best_point_m[j] @dataclass @@ -215,7 +219,6 @@ class SurfaceDistanceProbeSensor( def __init__(self, sensor_options: SurfaceDistanceProbeOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - self._debug_objects: list = [] self._nearest_points_slice: slice | None = None def _get_return_format(self) -> tuple[tuple[int, ...], ...]: @@ -272,13 +275,9 @@ def _update_current_timestep_data( measured_data_timeline: "TensorRingBuffer", ): solver = shared_metadata.solver - current_ground_truth_data_T.zero_() - measured = measured_data_timeline.at(0, copy=False) - measured.zero_() - if shared_metadata.measured_scratch_T.shape != current_ground_truth_data_T.shape: - shared_metadata.measured_scratch_T = torch.empty_like(current_ground_truth_data_T) - measured_cols_b = shared_metadata.measured_scratch_T - + measured, measured_cols_b = get_measured_bufs( + shared_metadata, current_ground_truth_data_T, measured_data_timeline + ) _kernel_surface_distance_probe( shared_metadata.probe_positions, shared_metadata.probe_radii, @@ -316,24 +315,30 @@ def _draw_debug(self, context: "RasterizerContext"): link_pos = self._link.get_pos(env_idx).squeeze() link_quat = self._link.get_quat(env_idx).squeeze() - probe_world = tensor_to_array(gu.transform_by_trans_quat(self._probe_local_pos, link_pos, link_quat)) + probe_world = tensor_to_array( + gu.transform_by_trans_quat(self._probe_local_pos.reshape(-1, 3), link_pos, link_quat) + ).reshape(-1, 3) points = tensor_to_array(self.nearest_points[env_idx]).reshape(-1, 3) + rgb = tuple(float(c) for c in self._options.debug_probe_color) + line_color = (*rgb, 1.0) + self._debug_objects.extend(self._draw_probe_spheres(context, probe_world, rgb)) self._debug_objects.append( context.draw_debug_spheres( - poss=np.concatenate([probe_world, points]), - radius=self._options.debug_sphere_radius, - color=self._options.debug_probe_color, + poss=points, + radius=float(self._options.debug_probe_center_radius), + color=line_color, ) ) for i in range(len(probe_world)): - line_obj = context.draw_debug_line( - probe_world[i], - points[i], - radius=self._options.debug_sphere_radius / 4.0, - color=self._options.debug_probe_color, + self._debug_objects.append( + context.draw_debug_line( + probe_world[i], + points[i], + radius=float(self._options.debug_probe_center_radius) / 4.0, + color=line_color, + ) ) - self._debug_objects.append(line_obj) @property def nearest_points(self) -> torch.Tensor: diff --git a/genesis/engine/sensors/tactile_shared.py b/genesis/engine/sensors/tactile_shared.py new file mode 100644 index 0000000000..7613db64a1 --- /dev/null +++ b/genesis/engine/sensors/tactile_shared.py @@ -0,0 +1,296 @@ +import math +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Callable, Generic, TypeVar + +import numpy as np +import torch + +import genesis as gs +from genesis.utils.misc import concat_with_tensor, make_tensor_field + +if TYPE_CHECKING: + from genesis.utils.ring_buffer import TensorRingBuffer + + +_GRID_TOL = 1.0e-5 # Tolerance for grid-regularity / orthogonality / normal-uniformity checks. + + +def next_pow2(n: int) -> int: + """Smallest power of 2 >= ``n`` (1 if ``n == 0``).""" + if n <= 1: + return 1 + p = 1 + while p < n: + p *= 2 + return p + + +# ============================ FFT helpers ============================ + + +@dataclass +class GridFFTConvMetadataMixin: + """ + Shared per-sensor-class state for the per-grid 2D-FFT convolution passes. + + Parameters + ---------- + grid_fft_meta : list of tuple + Per-grid-FFT-sensor metadata tuples. The leading 5 fields are always + ``(sensor_idx, g_ny, g_nx, probe_start, cache_start)``; sensors append their kernel params after that. + grid_fft_max_n : (int, int) + Global FFT size ``(fft_ny, fft_nx)``, the elementwise max over all registered grid sensors. Build-time only. + grid_fft_kernels_stacked : torch.Tensor + Stacked complex ``rfft2`` kernels (half spectrum), shape ``(n_grid, n_planes, fft_ny, fft_nx // 2 + 1)``. + Recomputed when the FFT size grows. + grid_fft_buffer : torch.Tensor + Reused per-step real buffer: ``(B, n_grid, n_channels, fft_ny, fft_nx)`` when registered with + ``n_buffer_channels > 0``, else ``(B, n_grid, fft_ny, fft_nx)``. Reallocated on each registration. + any_grid_fft : bool + Python fast-path flag; True iff at least one grid-FFT sensor is registered. + """ + + grid_fft_meta: list[tuple] = field(default_factory=list) + grid_fft_max_n: tuple[int, int] = (0, 0) + grid_fft_kernels_stacked: torch.Tensor = make_tensor_field((0, 0, 0, 0), dtype_factory=lambda: torch.complex64) + grid_fft_buffer: torch.Tensor = make_tensor_field((0, 0, 0, 0)) + any_grid_fft: bool = False + + +def register_grid_fft_sensor( + metadata: GridFFTConvMetadataMixin, + meta_entry: tuple, + this_fft_n: tuple[int, int], + kernel_builder: Callable[[tuple, tuple[int, int]], torch.Tensor], + n_buffer_channels: int, + batch_size: int, +) -> None: + """ + Register one grid-shaped sensor for FFT convolution; (re)build the stacked kernels and the per-step buffer. + + Parameters + ---------- + metadata : GridFFTConvMetadataMixin + Shared per-sensor-class FFT state to register into and (re)build. + meta_entry : tuple + Metadata tuple appended to ``grid_fft_meta``; its leading 5 fields must be + ``(sensor_idx, g_ny, g_nx, probe_start, cache_start)``, followed by any sensor-specific kernel params. + this_fft_n : (int, int) + The ``(ny, nx)`` FFT size this sensor needs. The shared ``grid_fft_max_n`` grows to the elementwise max; + when it grows, every prior sensor's kernel is recomputed at the new size (frequency-domain padding is not + equivalent to spatial zero-padding). + kernel_builder : callable + ``kernel_builder(meta_entry, fft_n) -> (n_planes, fft_ny, fft_nx // 2 + 1)`` complex tensor (an ``rfft2`` + half spectrum). Must be deterministic from the meta tuple, since it is re-invoked whenever the FFT size grows. + n_buffer_channels : int + When ``> 0``, allocate a 5D ``(B, n_grid, n_buffer_channels, ny, nx)`` per-step buffer; else a 4D + ``(B, n_grid, ny, nx)`` one. + batch_size : int + Number of environments ``B``; the leading dimension of the per-step buffer. + """ + metadata.grid_fft_meta.append(meta_entry) + cur = metadata.grid_fft_max_n + new_n = (max(cur[0], this_fft_n[0]), max(cur[1], this_fft_n[1])) + metadata.grid_fft_max_n = new_n + n_grid = len(metadata.grid_fft_meta) + metadata.grid_fft_kernels_stacked = torch.stack([kernel_builder(m, new_n) for m in metadata.grid_fft_meta], dim=0) + buffer_shape = ( + (batch_size, n_grid, n_buffer_channels, new_n[0], new_n[1]) + if n_buffer_channels > 0 + else (batch_size, n_grid, new_n[0], new_n[1]) + ) + metadata.grid_fft_buffer = torch.zeros(buffer_shape, dtype=gs.tc_float, device=gs.device) + metadata.any_grid_fft = True + + +def expand_probe_normals(normals: np.ndarray, n_probes: int, probe_shape: tuple[int, ...]) -> np.ndarray: + """Broadcast ``normals`` to a flat ``(n_probes, 3)`` array. + + Accepts a single shared normal of shape ``(3,)``, a grid-shaped ``(*probe_shape, 3)`` array, or an already-flat + ``(n_probes, 3)``. Any other shape raises. + """ + normals = np.asarray(normals, dtype=gs.np_float) + if normals.ndim == 1: + return np.broadcast_to(normals, (n_probes, 3)).copy() + if normals.shape == (*probe_shape, 3): + return normals.reshape(n_probes, 3).copy() + if normals.shape == (n_probes, 3): + return normals.copy() + gs.raise_exception( + "probe_local_normal must be one normal or match probe_local_pos shape. " + f"Got normal shape {normals.shape} for probe shape {probe_shape}." + ) + + +def normalize_grid_probe_layout( + probe_pos: np.ndarray, probe_normals: np.ndarray, is_grid: bool +) -> tuple[np.ndarray, np.ndarray, bool, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """ + Validate a probe layout and extract grid-FFT metadata when the layout qualifies. + + Returns ``(flat_positions, flat_normals, use_grid_fft, grid_normal, tangent_u, tangent_v, grid_spacing)``. When + the layout is flat (``is_grid=False``) or fails any grid-FFT precondition, ``use_grid_fft`` is False and the + tangent / spacing entries are zero. + + Grid-FFT preconditions: shape ``(ny, nx, 3)`` with ``ny, nx >= 2``, normals uniform within tolerance, tangents + orthogonal, both tangents in the plane perpendicular to the normal, and all interior probes laid out on a + regular ``(spacing_u, spacing_v)`` rectangle. + """ + probe_shape = probe_pos.shape[:-1] + flat = probe_pos.reshape(-1, 3) + normals = expand_probe_normals(probe_normals, flat.shape[0], probe_shape) + + normal_norms = np.linalg.norm(normals, axis=1) + if np.any(normal_norms < gs.EPS): + gs.raise_exception("probe_local_normal entries must be non-zero.") + normals = normals / normal_norms[:, None] + + use_grid_fft = False + grid_normal = np.zeros(3, dtype=gs.np_float) + tangent_u = np.zeros(3, dtype=gs.np_float) + tangent_v = np.zeros(3, dtype=gs.np_float) + grid_spacing = np.zeros(2, dtype=gs.np_float) + + if is_grid: + if len(probe_shape) != 2: + gs.raise_exception("Grid probe_local_pos must have shape (ny, nx, 3).") + ny, nx = int(probe_shape[0]), int(probe_shape[1]) + if nx >= 2 and ny >= 2: + grid = probe_pos.reshape(ny, nx, 3) + step_u = grid[0, 1] - grid[0, 0] + step_v = grid[1, 0] - grid[0, 0] + spacing_u = float(np.linalg.norm(step_u)) + spacing_v = float(np.linalg.norm(step_v)) + if spacing_u >= gs.EPS and spacing_v >= gs.EPS: + tangent_u_candidate = (step_u / spacing_u).astype(gs.np_float) + tangent_v_candidate = (step_v / spacing_v).astype(gs.np_float) + normal_candidate = normals[0].astype(gs.np_float, copy=False) + normals_are_uniform = bool(np.all(normals @ normal_candidate >= 1.0 - _GRID_TOL)) + axes_are_orthogonal = abs(float(tangent_u_candidate @ tangent_v_candidate)) <= _GRID_TOL + axes_in_plane = ( + abs(float(tangent_u_candidate @ normal_candidate)) <= _GRID_TOL + and abs(float(tangent_v_candidate @ normal_candidate)) <= _GRID_TOL + ) + expected = ( + grid[0, 0] + + np.arange(nx, dtype=gs.np_float)[None, :, None] * step_u[None, None, :] + + np.arange(ny, dtype=gs.np_float)[:, None, None] * step_v[None, None, :] + ) + is_regular = bool(np.max(np.linalg.norm(grid - expected, axis=-1)) <= _GRID_TOL) + use_grid_fft = normals_are_uniform and axes_are_orthogonal and axes_in_plane and is_regular + if use_grid_fft: + grid_normal = normal_candidate + tangent_u = tangent_u_candidate + tangent_v = tangent_v_candidate + grid_spacing = np.array((spacing_u, spacing_v), dtype=gs.np_float) + + return ( + flat.astype(gs.np_float, copy=False), + normals.astype(gs.np_float, copy=False), + use_grid_fft, + grid_normal.astype(gs.np_float, copy=False), + tangent_u.astype(gs.np_float, copy=False), + tangent_v.astype(gs.np_float, copy=False), + grid_spacing.astype(gs.np_float, copy=False), + ) + + +# ============================ ViscoelasticHysteresis ============================ + + +@dataclass +class ViscoelasticHysteresisMetadataMixin: + hysteresis_strength: torch.Tensor = make_tensor_field((0,)) + hysteresis_alpha: torch.Tensor = make_tensor_field((0,)) + viscoelastic_xi: torch.Tensor = make_tensor_field((0, 0)) + viscoelastic_prev_input: torch.Tensor = make_tensor_field((0, 0)) + viscoelastic_strength_row: torch.Tensor = make_tensor_field((0,)) + viscoelastic_alpha_row: torch.Tensor = make_tensor_field((0,)) + has_any_hysteresis: bool = False + + +ViscoelasticHysteresisSharedMetadataT = TypeVar( + "ViscoelasticHysteresisSharedMetadataT", bound=ViscoelasticHysteresisMetadataMixin +) + + +class ViscoelasticHysteresisMixin(Generic[ViscoelasticHysteresisSharedMetadataT]): + """ + Viscoelastic hysteresis (single Maxwell element, equilibrium gain normalized to 1). + + Per simulation step:: + alpha = exp(-dt / tau) + xi <- alpha * xi + (x - x_prev) + output = x + strength * xi + x_prev <- x + + After a step input from 0 to X the output jumps to ``X * (1 + strength)`` and decays back to ``X`` with time + constant ``tau``. On cyclic input the output overshoots on rising edges and undershoots on falling edges, + """ + + _shared_metadata: ViscoelasticHysteresisSharedMetadataT + + def build(self): + super().build() + # ``getattr`` so sensor classes whose options don't declare the hysteresis fields (e.g. ``ContactProbe``, + # which inherits the sensor mixin via ``ContactDepthProbeSensor`` but has bool output and isn't a viscoelastic + # target) still build cleanly with hysteresis disabled. + strength = float(getattr(self._options, "hysteresis_strength", 0.0)) + tau = float(getattr(self._options, "hysteresis_tau", 0.0)) + alpha = math.exp(-self._dt / tau) if tau > 0.0 else 0.0 + self._shared_metadata.hysteresis_strength = concat_with_tensor( + self._shared_metadata.hysteresis_strength, strength, expand=(1,) + ) + self._shared_metadata.hysteresis_alpha = concat_with_tensor( + self._shared_metadata.hysteresis_alpha, alpha, expand=(1,) + ) + if strength > 0.0 and tau > 0.0: + self._shared_metadata.has_any_hysteresis = True + # Invalidate lazy rows so they rebuild on first apply against the final cache width. Per-column state tensors + # are allocated lazily at the same time, so sensor classes that never enable hysteresis pay no memory cost. + self._shared_metadata.viscoelastic_strength_row = torch.empty((0,), dtype=gs.tc_float, device=gs.device) + self._shared_metadata.viscoelastic_alpha_row = torch.empty((0,), dtype=gs.tc_float, device=gs.device) + + @classmethod + def reset( + cls, + shared_metadata: ViscoelasticHysteresisSharedMetadataT, + shared_ground_truth_cache: torch.Tensor, + envs_idx, + ): + super().reset(shared_metadata, shared_ground_truth_cache, envs_idx) + if shared_metadata.viscoelastic_xi.numel() > 0: + shared_metadata.viscoelastic_xi[envs_idx] = 0.0 + shared_metadata.viscoelastic_prev_input[envs_idx] = 0.0 + + @classmethod + def _apply_transform( + cls, + shared_metadata: ViscoelasticHysteresisSharedMetadataT, + data: torch.Tensor, + timeline: "TensorRingBuffer", + *, + is_measured: bool, + ): + super()._apply_transform(shared_metadata, data, timeline, is_measured=is_measured) + if not is_measured or not shared_metadata.has_any_hysteresis: + return + + B, n_cols, *_ = data.shape + # Lazily build the per-cache-column strength/alpha rows and state buffers + if shared_metadata.viscoelastic_strength_row.shape != (n_cols,): + sensor_col_idx = [] + for i_s, size in enumerate(shared_metadata.cache_sizes): + sensor_col_idx.extend([i_s] * size) + idx_t = torch.tensor(sensor_col_idx, dtype=torch.long, device=gs.device) + shared_metadata.viscoelastic_strength_row = shared_metadata.hysteresis_strength[idx_t].to(dtype=data.dtype) + shared_metadata.viscoelastic_alpha_row = shared_metadata.hysteresis_alpha[idx_t].to(dtype=data.dtype) + shared_metadata.viscoelastic_xi = torch.zeros((B, n_cols), dtype=data.dtype, device=gs.device) + shared_metadata.viscoelastic_prev_input = torch.zeros((B, n_cols), dtype=data.dtype, device=gs.device) + + xi = shared_metadata.viscoelastic_xi + prev = shared_metadata.viscoelastic_prev_input + xi.mul_(shared_metadata.viscoelastic_alpha_row.unsqueeze(0)) + xi.add_(data).sub_(prev) + prev.copy_(data) + data.addcmul_(xi, shared_metadata.viscoelastic_strength_row.unsqueeze(0)) diff --git a/genesis/options/sensors/options.py b/genesis/options/sensors/options.py index 6ea1223ce3..9d21644114 100644 --- a/genesis/options/sensors/options.py +++ b/genesis/options/sensors/options.py @@ -13,15 +13,19 @@ NonNegativeInt, OptionalIArrayType, PositiveFArrayType, + PositiveFGridType, PositiveFloat, PositiveVec3IType, RotationMatrixType, + UnitInterval, UnitIntervalVec3Type, UnitIntervalVec4Type, UnitVec3FArrayType, + UnitVec3FGridType, UnitVec3FType, Vec2FType, Vec3FArrayType, + Vec3FGridType, Vec3FType, Vec4FType, is_sequence, @@ -201,25 +205,39 @@ class ProbeSensorOptionsMixin(SensorOptions[SensorT]): Parameters ---------- - probe_local_pos : array-like[array-like[float, float, float]] - Probe positions in link-local frame. One ``(x, y, z)`` per probe. - probe_radius : float | array-like[float] - Probe sensing radius in meters. A scalar is shared by every probe; an array must match the probe count. + probe_local_pos : array-like[array-like[float, float, float]] or shape ``(M, N, 3)`` grid + Probe positions in link-local frame. Either a flat ``(N, 3)`` set or a 2D grid ``(M, N, 3)``; the + ``read()`` output is reshaped back to match this layout. + probe_radius : float | array-like[float] or shape ``(M, N)`` grid + Probe sensing radius in meters. A scalar is shared by every probe; an array (or grid) must match the + layout of ``probe_local_pos``. probe_radius_noise : float Additive radius noise in meters used by kernels whose measured branch depends on effective probe radius. - debug_probe_color : array-like[float, float, float, float] - RGBA color for inactive debug probe spheres. + debug_probe_color : array-like[float, float, float] + RGB color for debug probe spheres (no alpha; the center sphere is drawn opaque and the outer sphere uses + ``debug_probe_sphere_opacity``). + debug_probe_center_radius : float + Radius in meters of the small opaque marker sphere drawn at each probe position. + debug_probe_sphere_opacity : float + Alpha (0..1) of the outer translucent sphere drawn at each probe's sensing radius. Set to ``0.0`` to skip. """ - probe_local_pos: Vec3FArrayType = ((0.0, 0.0, 0.0),) - probe_radius: PositiveFArrayType | PositiveFloat = 0.01 + probe_local_pos: Vec3FArrayType | Vec3FGridType = ((0.0, 0.0, 0.0),) + probe_radius: PositiveFloat | PositiveFArrayType | PositiveFGridType = 0.01 probe_radius_noise: NonNegativeFloat = 0.0 - debug_probe_color: UnitIntervalVec4Type = (0.2, 0.6, 1.0, 0.6) + debug_probe_color: UnitIntervalVec3Type = (0.2, 0.4, 1.0) + debug_probe_center_radius: PositiveFloat = 0.0008 + debug_probe_sphere_opacity: UnitInterval = 0.3 def model_post_init(self, context: Any) -> None: super().model_post_init(context) - n_probes = np.array(self.probe_local_pos).reshape(-1, 3).shape[0] - _check_len_match(self.probe_radius, n_probes, "probe_radius", "probe_local_pos") + n_probes = int(np.prod(np.asarray(self.probe_local_pos).shape[:-1])) + if isinstance(self.probe_radius, Sequence): + if np.asarray(self.probe_radius).size != n_probes: + gs.raise_exception( + f"probe_radius shape {np.asarray(self.probe_radius).shape} must contain " + f"{n_probes} entries to match probe_local_pos." + ) class ProbesWithNormalSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): @@ -227,16 +245,16 @@ class ProbesWithNormalSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): Probe options for sensors that also define one normal per probe, or one shared normal. """ - probe_local_normal: UnitVec3FArrayType | UnitVec3FType = (0.0, 0.0, 1.0) + probe_local_normal: UnitVec3FType | UnitVec3FArrayType | UnitVec3FGridType = (0.0, 0.0, 1.0) def model_post_init(self, context: Any) -> None: super().model_post_init(context) - n_probes = np.array(self.probe_local_pos).reshape(-1, 3).shape[0] - normals = np.array(self.probe_local_normal) - if normals.ndim > 1 and normals.reshape(-1, 3).shape[0] != n_probes: + n_probes = int(np.prod(np.asarray(self.probe_local_pos).shape[:-1])) + normals = np.asarray(self.probe_local_normal) + if normals.ndim > 1 and normals.size // 3 != n_probes: gs.raise_exception( - "probe_local_normal must be one normal or match probe_local_pos length. " - f"Got {normals.reshape(-1, 3).shape[0]} normals and {n_probes} probe positions." + "probe_local_normal must be one normal or contain one normal per probe. " + f"Got normal shape {normals.shape} for {n_probes} probes." ) @@ -492,18 +510,14 @@ class SurfaceDistanceProbe( Probe positions in link-local frame. One (x, y, z) per probe. probe_radius : float | array-like[float] Maximum sensing range in meters. When no mesh is within this distance, distance is clamped to the probe - radius and nearest points is the probe position. Default: 10.0. + radius and nearest points is the probe position. Default: 0.5. Also controls the outer debug sphere. track_link_idx : array-like[int] Global link indices (solver link space) whose mesh geoms are used for distance queries. - debug_sphere_radius: float, optional - The radius of each debug sphere drawn in the scene. Defaults to 0.008. """ - probe_radius: PositiveFArrayType | PositiveFloat = 10.0 + probe_radius: PositiveFArrayType | PositiveFloat = 0.5 track_link_idx: IArrayType = Field(default_factory=tuple) - debug_sphere_radius: PositiveFloat = 0.008 - def validate_scene(self, scene: "Scene"): super().validate_scene(scene) n_links = scene.sim.rigid_solver.n_links diff --git a/genesis/options/sensors/tactile.py b/genesis/options/sensors/tactile.py index 230e0599cd..337a98af6a 100644 --- a/genesis/options/sensors/tactile.py +++ b/genesis/options/sensors/tactile.py @@ -1,27 +1,32 @@ -from typing import TYPE_CHECKING, Annotated, Any, Sequence +from typing import TYPE_CHECKING, Any +import numpy as np from pydantic import Field, StrictBool import genesis as gs from genesis.typing import ( + FArrayType, + FGridType, IArrayType, NonNegativeFloat, NonNegativeInt, - NumericType, + PositiveFArrayType, PositiveFloat, PositiveInt, + PositiveVec2FType, + UnitIntervalVec3Type, UnitIntervalVec4Type, - UnitVec3FArrayType, - UnitVec3FType, - Vec3FArrayType, + Vec2FType, ) from .options import ( ProbeSensorOptionsMixin, ProbesWithNormalSensorOptionsMixin, RigidSensorOptionsMixin, + SensorOptions, SensorT, SimpleSensorOptions, + _check_len_match, ) if TYPE_CHECKING: @@ -33,11 +38,46 @@ ProximityTaxelSensor, ) - Vec3FGridType = Sequence[Sequence[Sequence[NumericType]]] - UnitVec3FGridType = Sequence[Sequence[Sequence[NumericType]]] -else: - Vec3FGridType = Annotated[tuple[Vec3FArrayType, ...], Field(min_length=1, strict=False)] - UnitVec3FGridType = Annotated[tuple[UnitVec3FArrayType, ...], Field(min_length=1, strict=False)] + +def _validate_filler_probe_radius(probe_radius, sensor_name: str) -> None: + """ + Validate a ``probe_radius`` that permits 0-valued (inactive padding for grid) entries. + """ + radii = np.atleast_1d(np.asarray(probe_radius, dtype=float)) + if np.any(radii < 0.0): + gs.raise_exception(f"{sensor_name} probe_radius entries must be non-negative. Got {probe_radius}.") + if not np.any(radii > 0.0): + gs.raise_exception(f"{sensor_name} requires at least one positive probe_radius. Got {probe_radius}.") + + +class ViscoelasticHysteresisOptionsMixin(SensorOptions[SensorT]): + """ + Single-Maxwell viscoelastic hysteresis applied on the measured branch only. + + Output equals ``x + hysteresis_strength * xi``, where ``xi`` is a per-cache-column state with + ``xi_k = exp(-dt / hysteresis_tau) * xi_{k-1} + (x_k - x_{k-1})``. Equilibrium gain is 1 (steady-state output = + steady-state input). On a step input, output transiently overshoots by ``strength``, decaying with time constant + ``tau``. On cyclic input this gives a loading-unloading loop in output-vs-input space. + + Parameters + ---------- + hysteresis_strength : float, optional + Dimensionless ratio of the Maxwell branch to the equilibrium branch (``E_1 / E_inf`` with ``E_inf = 1``). + ``0`` disables hysteresis. Default ``0``. + hysteresis_tau : float, optional + Relaxation time constant in seconds. Must be positive when ``hysteresis_strength > 0``. + """ + + hysteresis_strength: NonNegativeFloat = 0.0 + hysteresis_tau: NonNegativeFloat = 0.0 + + def model_post_init(self, context: Any) -> None: + super().model_post_init(context) + if self.hysteresis_strength > 0.0 and self.hysteresis_tau <= 0.0: + gs.raise_exception( + f"hysteresis_tau ({self.hysteresis_tau}) must be > 0 when hysteresis_strength " + f"({self.hysteresis_strength}) > 0." + ) class TactileProbeSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): @@ -51,11 +91,45 @@ class TactileProbeSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): Parameters ---------- - debug_contact_color: array-like[float, float, float, float] - The color of the debug contact. Defaults to (1.0, 0.2, 0.0, 0.8). + debug_contact_color: array-like[float, float, float] + RGB color of the debug probe spheres while in contact. + probe_gain : float | array-like[float], optional + Per-taxel multiplicative gain applied to the measured-branch contact depth. Default ``1.0`` (no gain). Accepts + a scalar (applied to all probes) or an array matching the probe count. Force/torque scale as + ``gain**normal_exponent`` because the spring-damper sees the gained depth. + probe_gain_resample_range : (float, float), optional + If set, the per-probe gain is resampled uniformly in ``(low, high)`` on every ``scene.reset()``. Disables the + static ``probe_gain`` after the first reset. Default ``None`` (no resampling; gain stays at initial value). + dead_taxel_probability : float, optional + Per-probe Bernoulli probability that the taxel becomes dead on each ``scene.reset()``. Default ``0.0`` + (no dead taxels). When set, the intermediate-cache value for dead probes is overwritten by a fresh + per-channel uniform sample in ``dead_taxel_value_range`` at the hardware-imperfections stage; the GT branch + is untouched. + dead_taxel_value_range : (float, float), optional + Uniform range for the dead value sampled per channel on reset. Default ``(0.0, 0.0)``. """ - debug_contact_color: UnitIntervalVec4Type = (1.0, 0.2, 0.0, 0.8) + debug_contact_color: UnitIntervalVec3Type = (1.0, 0.2, 0.0) + + probe_gain: PositiveFArrayType | PositiveFloat = 1.0 + probe_gain_resample_range: PositiveVec2FType | None = None + dead_taxel_probability: NonNegativeFloat = 0.0 + dead_taxel_value_range: Vec2FType = (0.0, 0.0) + + def model_post_init(self, context: Any) -> None: + super().model_post_init(context) + n_probes = int(np.prod(np.asarray(self.probe_local_pos).shape[:-1])) + _check_len_match(self.probe_gain, n_probes, "probe_gain", "probe_local_pos") + + if self.probe_gain_resample_range is not None: + low, high = float(self.probe_gain_resample_range[0]), float(self.probe_gain_resample_range[1]) + if low > high: + gs.raise_exception(f"probe_gain_resample_range must satisfy low <= high. Got ({low}, {high}).") + if self.dead_taxel_probability > 1.0: + gs.raise_exception(f"dead_taxel_probability must be in [0, 1]. Got {self.dead_taxel_probability}.") + low, high = float(self.dead_taxel_value_range[0]), float(self.dead_taxel_value_range[1]) + if low > high: + gs.raise_exception(f"dead_taxel_value_range must satisfy low <= high. Got ({low}, {high}).") class PointCloudTactileSensorMixin(TactileProbeSensorOptionsMixin[SensorT]): @@ -86,23 +160,44 @@ class ContactProbe( RigidSensorOptionsMixin["ContactProbeSensor"], SimpleSensorOptions["ContactProbeSensor"], TactileProbeSensorOptionsMixin["ContactProbeSensor"], + ViscoelasticHysteresisOptionsMixin["ContactProbeSensor"], ): """ Returns boolean contact per probe based on the contact depth threshold. Parameters ---------- + probe_radius : float | array-like[float] or shape ``(M, N)`` grid + Probe sensing radius in meters. A scalar is shared by every probe; an array (or grid) must match the + layout of ``probe_local_pos``. Array entries of ``0`` mark inactive filler probes -- they always read + ``False`` and skip the SDF query -- so an irregular taxel set can be padded into a regular grid. contact_threshold: float - A probe is considered in contact if the penetration depth is greater than or equal to this threshold (meters). + Penetration depth (meters) at or above which a probe latches into contact. + release_threshold: float, optional + Penetration depth (meters) at or below which a latched probe releases (Schmitt-trigger hysteresis). Must be + ``<= contact_threshold``. Defaults to ``contact_threshold`` (no hysteresis). """ + # Permits 0-valued (inactive filler) entries; see _validate_filler_probe_radius. + probe_radius: PositiveFloat | FArrayType | FGridType = 0.01 + contact_threshold: NonNegativeFloat = 0.0001 + release_threshold: NonNegativeFloat | None = None + + def model_post_init(self, context: Any) -> None: + super().model_post_init(context) + _validate_filler_probe_radius(self.probe_radius, "ContactProbe") + if self.release_threshold is not None and self.release_threshold > self.contact_threshold: + gs.raise_exception( + f"release_threshold ({self.release_threshold}) must be <= contact_threshold ({self.contact_threshold})." + ) class ContactDepthProbe( RigidSensorOptionsMixin["ContactDepthProbeSensor"], SimpleSensorOptions["ContactDepthProbeSensor"], TactileProbeSensorOptionsMixin["ContactDepthProbeSensor"], + ViscoelasticHysteresisOptionsMixin["ContactDepthProbeSensor"], ): """ Returns contact depth in meters per probe. @@ -114,6 +209,7 @@ class KinematicTaxel( SimpleSensorOptions["KinematicTaxelSensor"], TactileProbeSensorOptionsMixin["KinematicTaxelSensor"], ProbesWithNormalSensorOptionsMixin["KinematicTaxelSensor"], + ViscoelasticHysteresisOptionsMixin["KinematicTaxelSensor"], ): """ A tactile sensor which estimates force and torque per taxel by querying contact depth relative to given probe @@ -132,8 +228,16 @@ class KinematicTaxel( ---- If this sensor is attached to a fixed entity, it will not detect contacts with other fixed entities. + ``probe_local_pos`` may be either an arbitrary set of probes with shape ``(N, 3)`` or a grid-shaped set with shape + ``(M, N, 3)``. Regular planar grids enable FFT-based spatial crosstalk on the measured branch (see + ``crosstalk_strength``). A probe whose ``probe_radius`` is 0 is treated as an inactive filler -- it reads 0 + force/torque and is skipped -- so an irregular taxel set can be padded into a regular grid for FFT crosstalk. + Parameters ---------- + probe_radius : float | array-like[float] + Probe sensing radius in meters. A scalar is shared by every probe; an array must match the probe count. + Array entries of 0 mark inactive filler probes (see the grid note above); at least one must be positive. normal_stiffness : float Stiffness for normal force estimation based on contact penetration depth and spring-damper model. normal_damping : float @@ -145,19 +249,38 @@ class KinematicTaxel( Coefficient for shear force estimation based on relative linear velocity of the probe and entity in contact. twist_scalar : float, optional Coefficient for twist torque estimation based on relative angular velocity of the probe and entity in contact. + crosstalk_strength : float, optional + Spatial crosstalk mixing fraction applied on the measured branch. ``0`` (default) disables; ``1`` is pure + Gaussian blur with sigma ``crosstalk_sigma``. Requires a validated regular grid layout for + ``probe_local_pos`` and ``crosstalk_sigma > 0``. + crosstalk_sigma : float, optional + Gaussian crosstalk standard deviation in meters (same units as ``probe_local_pos`` spacing). Must be > 0 + when ``crosstalk_strength > 0``. """ + # Permits 0-valued (inactive filler) entries; see _validate_filler_probe_radius. + probe_radius: PositiveFloat | FArrayType | FGridType = 0.01 + normal_stiffness: NonNegativeFloat = 1000.0 normal_damping: NonNegativeFloat = 1.0 normal_exponent: NonNegativeFloat = 1.0 shear_scalar: NonNegativeFloat = 1.0 twist_scalar: NonNegativeFloat = 1.0 + crosstalk_strength: NonNegativeFloat = 0.0 + crosstalk_sigma: NonNegativeFloat = 0.0 + def model_post_init(self, context: Any) -> None: super().model_post_init(context) + _validate_filler_probe_radius(self.probe_radius, "KinematicTaxel") if self.normal_exponent < 1.0: gs.raise_exception(f"normal_exponent must be greater than or equal to 1.0. Got {self.normal_exponent}.") + if self.crosstalk_strength > 0.0 and self.crosstalk_sigma <= 0.0: + gs.raise_exception( + f"crosstalk_sigma ({self.crosstalk_sigma}) must be > 0 when crosstalk_strength " + f"({self.crosstalk_strength}) > 0." + ) class ElastomerTaxel( @@ -165,6 +288,7 @@ class ElastomerTaxel( SimpleSensorOptions["ElastomerTaxelSensor"], PointCloudTactileSensorMixin["ElastomerTaxelSensor"], ProbesWithNormalSensorOptionsMixin["ElastomerTaxelSensor"], + ViscoelasticHysteresisOptionsMixin["ElastomerTaxelSensor"], ): """ An elastomer tactile sensor that implements HydroShear-style marker displacement from Genesis SDF queries. @@ -175,7 +299,17 @@ class ElastomerTaxel( ---- ``probe_local_pos`` may be either an arbitrary set of probes with shape ``(N, 3)`` or a grid-shaped set with shape ``(M, N, 3)``. Regular planar grids with one shared normal use FFT acceleration for dilation; other layouts use the - direct dilation path. Shear is computed directly. + direct dilation path. Shear is computed directly. A probe whose ``probe_radius`` is 0 is treated as an inactive + filler -- it reads 0 and is excluded from dilation/shear -- so an irregular taxel set can be padded into a + regular grid for FFT acceleration. + + Note + ---- + ``probe_gain`` is applied to ElastomerTaxel as a post-step linear scale of the measured marker displacement + (the dilation kernel writes a single shared field for both branches). This is exact for the tangential + dilation and shear components but approximate for the normal dilation term, which scales as + ``depth**normal_exponent`` and would ideally scale as ``gain**normal_exponent`` rather than ``gain``. For + gains near 1 the error is small. Parameters ---------- @@ -184,18 +318,29 @@ class ElastomerTaxel( probe_local_normal : array-like[float, float, float] or array-like[array-like[float, float, float]] Unit direction(s) in link-local frame: one normal for all probes, or one normal per probe matching ``probe_local_pos``. + probe_radius : float | array-like[float] + Probe sensing radius in meters. A scalar is shared by every probe; an array must match the probe count. + Array entries of 0 mark inactive filler probes (see the grid note above); at least one must be positive. track_link_idx : array-like[int] Global rigid link indices whose collision geometry is queried by SDF and whose mesh is sampled for shear. n_sample_points: int | array-like[int] Total surface samples split across ``track_link_idx``, or one count per tracked link. lambda_d: float - Exponential coefficient for dilation spread. + Gaussian falloff coefficient (in 1/m^2) for the dilation kernel ``exp(-lambda_d * r^2)`` that smears each + in-contact probe's normal/tangential bulge across its neighbors. Larger values give sharper, more localized + markers; smaller values smear the bulge across more probes. lambda_s: float - Exponential coefficient for shear spread from tracked surface points. + Gaussian falloff coefficient (in 1/m^2) for the shear kernel ``exp(-lambda_s * r^2)`` that spreads each + anchored tracked-surface point's tangential displacement to nearby probes. Larger values keep shear tightly + local to the contact patch; smaller values produce a softer, more diffuse shear response. dilate_scale: float Scalar gain applied to dilation displacement. shear_scale: float Scalar gain applied to shear displacement. + normal_exponent: float + Exponent of the penetration-depth power law for the normal (out-of-plane) marker dilation: the normal + bulge scales as ``depth ** normal_exponent``. Must be >= 1.0. Default ``2.0`` (the HydroShear quadratic + normal response). Tangential dilation and shear stay linear in depth regardless of this value. elastomer_contact_sdf_enter: float Positive margin on signed distance: a tracked surface point starts anchoring shear when its elastomer SDF value is below ``-elastomer_contact_sdf_enter``. @@ -210,21 +355,25 @@ class ElastomerTaxel( should represent the compliant contact surface. """ - probe_local_pos: Vec3FArrayType | Vec3FGridType = ((0.0, 0.0, 0.0),) - probe_local_normal: UnitVec3FArrayType | UnitVec3FGridType | UnitVec3FType = (0.0, 0.0, 1.0) + # Permits 0-valued (inactive filler) entries; see _validate_filler_probe_radius. + probe_radius: PositiveFloat | FArrayType | FGridType = 0.01 lambda_d: NonNegativeFloat = 700.0 lambda_s: NonNegativeFloat = 300.0 dilate_scale: NonNegativeFloat = 1.0 shear_scale: NonNegativeFloat = 1.0 + normal_exponent: NonNegativeFloat = 2.0 elastomer_contact_sdf_enter: NonNegativeFloat = 1e-5 elastomer_contact_sdf_exit: NonNegativeFloat = 1e-4 def model_post_init(self, context: Any) -> None: super().model_post_init(context) + _validate_filler_probe_radius(self.probe_radius, "ElastomerTaxel") if len(self.track_link_idx) == 0: gs.raise_exception("ElastomerTaxel requires at least one tracked link in track_link_idx.") + if self.normal_exponent < 1.0: + gs.raise_exception(f"normal_exponent must be greater than or equal to 1.0. Got {self.normal_exponent}.") class ProximityTaxel( @@ -232,6 +381,7 @@ class ProximityTaxel( SimpleSensorOptions["ProximityTaxelSensor"], PointCloudTactileSensorMixin["ProximityTaxelSensor"], ProbesWithNormalSensorOptionsMixin["ProximityTaxelSensor"], + ViscoelasticHysteresisOptionsMixin["ProximityTaxelSensor"], ): """ A tactile sensor which estimates force and torque per taxel from proximity to point clouds sampled on tracked diff --git a/genesis/typing.py b/genesis/typing.py index ec79a39f85..e852376f53 100644 --- a/genesis/typing.py +++ b/genesis/typing.py @@ -104,6 +104,10 @@ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHa UnitVec3FArrayType = Vec3FArrayType Vec3FLaxArrayType = Vec3FArrayType | Vec3FType UnitVec3FLaxArrayType = Vec3FLaxArrayType + FGridType = Sequence[Sequence[NumericType]] | np.ndarray + PositiveFGridType = FGridType + Vec3FGridType = Sequence[Sequence[Sequence[NumericType]]] | np.ndarray + UnitVec3FGridType = Vec3FGridType RotationMatrixType = Vec3FArrayType Matrix3x3Type = Sequence[Sequence[NumericType]] | np.ndarray Matrix4x4Type = Sequence[Sequence[NumericType]] | np.ndarray @@ -165,6 +169,10 @@ def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHa StrArrayType = Annotated[tuple[str, ...], Field(strict=False)] Vec3FArrayType = Annotated[tuple[Vec3FType, ...], Field(min_length=1, strict=False)] UnitVec3FArrayType = Annotated[tuple[UnitVec3FType, ...], Field(min_length=1, strict=False)] + FGridType = Annotated[tuple[FArrayType, ...], Field(min_length=1, strict=False)] + PositiveFGridType = Annotated[tuple[PositiveFArrayType, ...], Field(min_length=1, strict=False)] + Vec3FGridType = Annotated[tuple[Vec3FArrayType, ...], Field(min_length=1, strict=False)] + UnitVec3FGridType = Annotated[tuple[UnitVec3FArrayType, ...], Field(min_length=1, strict=False)] Vec3FLaxArrayType = Annotated[ tuple[Vec3FType, ...], BeforeValidator(lambda v: v if is_sequence(v) and len(v) > 0 and is_sequence(v[0]) else (v,)), diff --git a/tests/test_sensors.py b/tests/test_sensors.py index 4e1c9043a1..1614012a3e 100644 --- a/tests/test_sensors.py +++ b/tests/test_sensors.py @@ -1682,11 +1682,6 @@ def test_surface_distance_sensor_box_sphere(show_viewer, tol, n_envs): ) -def _as_env_batch(data, n_envs: int) -> torch.Tensor: - data = torch.as_tensor(data, device=gs.device) - return data.unsqueeze(0) if n_envs == 0 else data - - # ------------------------------------------------------------------------------------------ # ----------------------------------- Tactile Sensors -------------------------------------- # ------------------------------------------------------------------------------------------ @@ -1702,6 +1697,7 @@ def test_kinematic_contact_probe_box_sphere_support(show_viewer, tol, n_envs): CONTACT_THRESHOLD = 0.002 STIFFNESS = 100.0 SPHERE_RADIUS = 0.1 + GAIN = 1.5 scene = gs.Scene( sim_options=gs.options.SimOptions( @@ -1758,23 +1754,43 @@ def test_kinematic_contact_probe_box_sphere_support(show_viewer, tol, n_envs): **common_kwargs, ) ) - depth_probe = scene.add_sensor(gs.sensors.ContactDepthProbe(**common_kwargs)) + depth_probe = scene.add_sensor( + gs.sensors.ContactDepthProbe( + **common_kwargs, + ), + ) noisy_radius_depth_probe = scene.add_sensor( gs.sensors.ContactDepthProbe( probe_radius_noise=0.25, **common_kwargs, ) ) - taxel = scene.add_sensor( - gs.sensors.KinematicTaxel( - probe_local_normal=probe_normals, - normal_stiffness=STIFFNESS, - normal_damping=0.0, - shear_scalar=0.0, - twist_scalar=0.0, + # probe_gain variants: depth/force should scale by the gain on the measured branch only. + gained_depth_probe = scene.add_sensor( + gs.sensors.ContactDepthProbe( + probe_gain=GAIN, **common_kwargs, ) ) + taxel_kwargs = dict( + probe_local_normal=probe_normals, + normal_stiffness=STIFFNESS, + normal_damping=0.0, + shear_scalar=0.0, + twist_scalar=0.0, + **common_kwargs, + ) + taxel = scene.add_sensor( + gs.sensors.KinematicTaxel( + **taxel_kwargs, + ), + ) + gained_taxel = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_gain=GAIN, + **taxel_kwargs, + ), + ) sphere_taxel = scene.add_sensor( gs.sensors.KinematicTaxel( entity_idx=sphere.idx, @@ -1792,13 +1808,13 @@ def test_kinematic_contact_probe_box_sphere_support(show_viewer, tol, n_envs): scene.build(n_envs=n_envs) scene.step() - depth = _as_env_batch(depth_probe.read_ground_truth(), n_envs) - contact = _as_env_batch(contact_probe.read_ground_truth(), n_envs) - force = _as_env_batch(taxel.read_ground_truth().force, n_envs) - torque = _as_env_batch(taxel.read_ground_truth().torque, n_envs) + depth = depth_probe.read_ground_truth() + contact = contact_probe.read_ground_truth() + force = taxel.read_ground_truth().force + torque = taxel.read_ground_truth().torque assert_equal(contact, depth > CONTACT_THRESHOLD) - assert _as_env_batch(noisy_radius_depth_probe.read(), n_envs).shape == depth.shape + assert noisy_radius_depth_probe.read().shape == depth.shape # Check that the box's bottom probe (idx 3) detects the ground. assert (depth[..., 3] > tol).all(), "Bottom probe should detect the ground." assert (force[..., 3, 2] > tol).all(), "Bottom taxel force should point upward." @@ -1811,15 +1827,25 @@ def test_kinematic_contact_probe_box_sphere_support(show_viewer, tol, n_envs): expected_normals = -torch.tensor(probe_normals, dtype=gs.tc_float, device=gs.device) assert_allclose(force, depth.unsqueeze(-1) * STIFFNESS * expected_normals, tol=tol) + # probe_gain scales the measured branch only; GT is untouched. normal_exponent defaults to 1, so the measured + # force is linear in the gained depth and scales by the same factor. + gained_depth = gained_depth_probe.read() + gained_force = gained_taxel.read().force + assert (depth[..., 3] > tol).all() # sanity: the bottom probe is in contact + assert_allclose(gained_depth[..., 3], depth[..., 3] * GAIN, tol=tol) + assert_allclose(gained_depth_probe.read_ground_truth(), depth, tol=gs.EPS) + assert_allclose(gained_force[..., 3, :], force[..., 3, :] * GAIN, tol=tol) + assert_allclose(gained_taxel.read_ground_truth().force, force, tol=gs.EPS) + # Now position the sphere to penetrate the top of the box. box_top_z = BOX_SIZE - PENETRATION sphere.set_pos((0.0, 0.0, box_top_z + SPHERE_RADIUS - PENETRATION)) scene.step() - depth = _as_env_batch(depth_probe.read_ground_truth(), n_envs) - contact = _as_env_batch(contact_probe.read_ground_truth(), n_envs) - force = _as_env_batch(taxel.read_ground_truth().force, n_envs) - sphere_force = _as_env_batch(sphere_taxel.read_ground_truth().force, n_envs) + depth = depth_probe.read_ground_truth() + contact = contact_probe.read_ground_truth() + force = taxel.read_ground_truth().force + sphere_force = sphere_taxel.read_ground_truth().force assert_equal(contact, depth > CONTACT_THRESHOLD) assert (depth[..., 0] > tol).all(), "Top center probe should detect the sphere." @@ -1834,6 +1860,377 @@ def test_kinematic_contact_probe_box_sphere_support(show_viewer, tol, n_envs): assert_allclose(sphere_taxel.read_ground_truth().force, 0.0, tol=gs.EPS) +@pytest.mark.required +def test_contact_probe_hysteresis(show_viewer): + """ContactProbe with release_threshold < contact_threshold latches like a Schmitt trigger. + + Depth-probe semantics: ``depth = probe_radius - sd(probe, geom)``. With the probe placed at the box center + (link-local origin) and the box descending into the ground plane, ``sd = box.z`` and + ``depth = probe_radius - box.z``, giving smooth control over the reported depth. + """ + n_envs = 0 + BOX_SIZE = 0.2 + # Place probe 0.05m above the box bottom; reported depth = probe_radius - probe.z. With probe_radius = 0.060, + # depth = 0.010 at zero penetration and grows linearly with penetration p. + PROBE_LOCAL_Z = -BOX_SIZE / 2 + 0.05 + PROBE_RADIUS = 0.060 + ENTER = 0.030 # triggered at p โ‰ˆ 0.020 + RELEASE = 0.015 # triggered at p โ‰ˆ 0.005 + + # box.z values; box.z = BOX_SIZE/2 - p gives penetration p. + BOX_Z_OFF = 1.0 # well above plane โ†’ no contact โ†’ depth = 0 + BOX_Z_BELOW_RELEASE = 0.099 # p = 0.001 โ†’ depth = 0.011 (< RELEASE) + BOX_Z_IN_BAND = 0.090 # p = 0.010 โ†’ depth = 0.020 (RELEASE < d < ENTER) + BOX_Z_ABOVE_ENTER = 0.070 # p = 0.030 โ†’ depth = 0.040 (> ENTER) + + scene = gs.Scene( + sim_options=gs.options.SimOptions(gravity=(0.0, 0.0, 0.0)), + profiling_options=gs.options.ProfilingOptions(show_FPS=False), + show_viewer=show_viewer, + ) + scene.add_entity(gs.morphs.Plane()) + box = scene.add_entity( + gs.morphs.Box( + size=(BOX_SIZE, BOX_SIZE, BOX_SIZE), + pos=(0.0, 0.0, BOX_Z_OFF), + fixed=False, + ), + ) + + common = dict( + entity_idx=box.idx, + probe_local_pos=((0.0, 0.0, PROBE_LOCAL_Z),), + probe_radius=PROBE_RADIUS, + draw_debug=show_viewer, + ) + hyst_probe = scene.add_sensor( + gs.sensors.ContactProbe( + contact_threshold=ENTER, + release_threshold=RELEASE, + **common, + ), + ) + plain_probe = scene.add_sensor( + gs.sensors.ContactProbe( + contact_threshold=ENTER, + **common, + ), + ) + + scene.build(n_envs=n_envs) + + def step_at(box_z): + box.set_pos((0.0, 0.0, box_z)) + scene.step() + h = hyst_probe.read_ground_truth() + p = plain_probe.read_ground_truth() + return h.reshape(-1), p.reshape(-1) + + # 1. No contact. + h, p = step_at(BOX_Z_OFF) + assert not h.any() and not p.any() + + # 2. Depth in band before any latch: both False (not latched). + h, p = step_at(BOX_Z_IN_BAND) + assert not h.any() and not p.any() + + # 3. Depth above enter: both latch True. + h, p = step_at(BOX_Z_ABOVE_ENTER) + assert h.all() and p.all() + + # 4. Lift to band: hyst stays latched, plain releases (depth < enter). + h, p = step_at(BOX_Z_IN_BAND) + assert h.all() and not p.any() + + # 5. Lift to below release: hyst clears. + h, p = step_at(BOX_Z_BELOW_RELEASE) + assert not h.any() and not p.any() + + # 6. Back into band: still False (not latched). + h, p = step_at(BOX_Z_IN_BAND) + assert not h.any() and not p.any() + + # 7. Reset clears latch even if depth is in band. + step_at(BOX_Z_ABOVE_ENTER) + scene.reset() + h, p = step_at(BOX_Z_IN_BAND) + assert not h.any() and not p.any() + + +@pytest.mark.required +def test_contact_depth_probe_viscoelastic_hysteresis(show_viewer): + """ContactDepthProbe with hysteresis_strength > 0 overshoots GT after a step input and relaxes back to it. + + Single-Maxwell model with equilibrium gain 1: ``output = x + strength * xi``, ``xi`` decays with time constant + ``tau``. After a step from 0 to D, measured value jumps to D*(1+strength) and decays back to D. GT is untouched. + """ + n_envs = 0 + BOX_SIZE = 0.2 + PROBE_LOCAL_Z = -BOX_SIZE / 2 + 0.05 + PROBE_RADIUS = 0.060 + STRENGTH = 0.5 + DT = 0.01 + TAU = 0.05 # alpha = exp(-dt/tau) โ‰ˆ 0.819 + ALPHA = np.exp(-DT / TAU) + + BOX_Z_OFF = 1.0 + BOX_Z_ON = 0.080 # p = 0.020, depth = 0.030 in steady state + + scene = gs.Scene( + sim_options=gs.options.SimOptions(gravity=(0.0, 0.0, 0.0), dt=DT), + profiling_options=gs.options.ProfilingOptions(show_FPS=False), + show_viewer=show_viewer, + ) + scene.add_entity(gs.morphs.Plane()) + box = scene.add_entity( + gs.morphs.Box( + size=(BOX_SIZE, BOX_SIZE, BOX_SIZE), + pos=(0.0, 0.0, BOX_Z_OFF), + fixed=False, + ), + ) + common = dict( + entity_idx=box.idx, + probe_local_pos=((0.0, 0.0, PROBE_LOCAL_Z),), + probe_radius=PROBE_RADIUS, + draw_debug=show_viewer, + ) + hyst = scene.add_sensor( + gs.sensors.ContactDepthProbe( + hysteresis_strength=STRENGTH, + hysteresis_tau=TAU, + **common, + ), + ) + plain = scene.add_sensor( + gs.sensors.ContactDepthProbe( + **common, + ), + ) + + scene.build(n_envs=n_envs) + + def step_at(z): + box.set_pos((0.0, 0.0, z)) + scene.step() + return ( + hyst.read().reshape(-1), + hyst.read_ground_truth().reshape(-1), + plain.read().reshape(-1), + ) + + # Step 1: no contact. All zero. + h_m, h_gt, p_m = step_at(BOX_Z_OFF) + assert_allclose(h_m, 0.0, tol=gs.EPS) + assert_allclose(h_gt, 0.0, tol=gs.EPS) + assert_allclose(p_m, 0.0, tol=gs.EPS) + + # Step 2: jump to BOX_Z_ON. GT should equal plain measured (both = D). Hyst measured = D*(1+strength). + h_m, h_gt, p_m = step_at(BOX_Z_ON) + D = float(h_gt[0].item()) + assert D > 0.02 # sanity + assert_allclose(p_m, D, tol=1e-5) + assert_allclose(h_m, D * (1.0 + STRENGTH), tol=1e-4) + + # Subsequent steps at the same depth: xi decays by ALPHA each step, measured = D + strength * D * ALPHA^k. + for k in range(1, 5): + h_m, h_gt, p_m = step_at(BOX_Z_ON) + assert_allclose(h_gt, D, tol=1e-5) # GT untouched. + assert_allclose(p_m, D, tol=1e-5) # Plain untouched. + expected = D * (1.0 + STRENGTH * (ALPHA**k)) + assert_allclose(h_m, expected, tol=1e-4) + + # Reset clears xi: a single step at depth D should overshoot exactly like step 2. + scene.reset() + box.set_pos((0.0, 0.0, BOX_Z_OFF)) + scene.step() + h_m, h_gt, p_m = step_at(BOX_Z_ON) + assert_allclose(h_m, D * (1.0 + STRENGTH), tol=1e-4) + + +@pytest.mark.required +def test_probe_gain_and_dead_resample(show_viewer): + """probe_gain_resample_range and dead_taxel_probability redraw their per-(env, probe) randomness on every + scene.reset(): the gained sensor's measured depth carries a per-env gain in range, and the dead sensor's + measured value is overwritten by a per-env sample in range. GT is untouched, and both redraw on the next + reset.""" + BOX_SIZE = 0.2 + PROBE_LOCAL_Z = -BOX_SIZE / 2 + 0.05 + PROBE_RADIUS = 0.060 + GAIN_LOW, GAIN_HIGH = 0.5, 1.5 + DEAD_LOW, DEAD_HIGH = 0.123, 0.456 + BOX_Z = 0.080 # box descends into the plane so the real contact depth is non-zero + N_ENVS = 8 + + scene = gs.Scene( + sim_options=gs.options.SimOptions(gravity=(0.0, 0.0, 0.0)), + profiling_options=gs.options.ProfilingOptions(show_FPS=False), + show_viewer=show_viewer, + ) + scene.add_entity(gs.morphs.Plane()) + box = scene.add_entity( + gs.morphs.Box( + size=(BOX_SIZE, BOX_SIZE, BOX_SIZE), + pos=(0.0, 0.0, 1.0), + fixed=False, + ), + ) + common = dict( + entity_idx=box.idx, + probe_local_pos=((0.0, 0.0, PROBE_LOCAL_Z),), + probe_radius=PROBE_RADIUS, + draw_debug=show_viewer, + ) + gain_sensor = scene.add_sensor( + gs.sensors.ContactDepthProbe( + probe_gain_resample_range=(GAIN_LOW, GAIN_HIGH), + **common, + ), + ) + dead_sensor = scene.add_sensor( + gs.sensors.ContactDepthProbe( + dead_taxel_probability=1.0, + dead_taxel_value_range=(DEAD_LOW, DEAD_HIGH), + **common, + ), + ) + + scene.build(n_envs=N_ENVS) + + def reset_step_read(): + scene.reset() # triggers the per-(env, probe) resample of gain and dead state + box.set_pos([[0.0, 0.0, BOX_Z]] * N_ENVS) + scene.step() + gains = (gain_sensor.read() / gain_sensor.read_ground_truth()).reshape(-1).cpu() + dead = dead_sensor.read().reshape(-1).cpu() + return gains, dead + + gains_a, dead_a = reset_step_read() + # Gain stays in range, dead values are overwritten in range, and both vary across the 8 envs. + assert torch.all((gains_a >= GAIN_LOW - 1e-5) & (gains_a <= GAIN_HIGH + 1e-5)) + assert torch.all((dead_a >= DEAD_LOW - 1e-5) & (dead_a <= DEAD_HIGH + 1e-5)) + assert gains_a.std().item() > 0.01 and dead_a.std().item() > 0.01 + # The dead sensor's GT is untouched -- it still reports the real (non-zero) contact depth. + assert torch.all(dead_sensor.read_ground_truth().reshape(-1) > 0.0) + + # A second reset redraws both. + gains_b, dead_b = reset_step_read() + assert not torch.allclose(gains_a, gains_b, atol=1e-3) + assert not torch.allclose(dead_a, dead_b, atol=1e-3) + + +@pytest.mark.required +def test_kinematic_taxel_crosstalk(show_viewer): + """Crosstalk smears the measured force field across grid neighbors while leaving GT unchanged; total normal + force is preserved (DC of the normalized kernel is 1). Also checks ``crosstalk_strength=0`` is the exact + no-crosstalk path and that a grid layout matches a flat layout at the identical probe positions.""" + BOX_SIZE = 0.2 + PROBE_RADIUS = 0.02 + SPACING = 0.03 + SPHERE_RADIUS = 0.025 + BOX_BOTTOM_Z = 0.05 + CROSSTALK_STRENGTH = 0.6 + CROSSTALK_SIGMA = SPACING + + ny, nx = 5, 5 + grid_positions = np.zeros((ny, nx, 3), dtype=gs.np_float) + for iy in range(ny): + for ix in range(nx): + grid_positions[iy, ix] = ((ix - 2) * SPACING, (iy - 2) * SPACING, BOX_SIZE / 2) + + scene = gs.Scene( + sim_options=gs.options.SimOptions(gravity=(0.0, 0.0, 0.0)), + profiling_options=gs.options.ProfilingOptions(show_FPS=False), + show_viewer=show_viewer, + ) + box = scene.add_entity( + gs.morphs.Box(size=(BOX_SIZE, BOX_SIZE, BOX_SIZE), pos=(0.0, 0.0, BOX_BOTTOM_Z + BOX_SIZE / 2), fixed=True) + ) + sphere = scene.add_entity( + gs.morphs.Sphere( + radius=SPHERE_RADIUS, + pos=(0.0, 0.0, BOX_BOTTOM_Z + BOX_SIZE + SPHERE_RADIUS - 0.010), + fixed=False, + ) + ) + + common = dict( + entity_idx=box.idx, + probe_local_normal=(0.0, 0.0, 1.0), + probe_radius=PROBE_RADIUS, + normal_stiffness=100.0, + normal_damping=0.0, + shear_scalar=0.0, + twist_scalar=0.0, + ) + plain = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_pos=grid_positions.tolist(), + **common, + ), + ) + crosstalk = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_pos=grid_positions.tolist(), + crosstalk_strength=CROSSTALK_STRENGTH, + crosstalk_sigma=CROSSTALK_SIGMA, + **common, + ) + ) + # crosstalk_strength=0 must reproduce the no-crosstalk path exactly, even with a non-zero sigma. + crosstalk_off = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_pos=grid_positions.tolist(), crosstalk_strength=0.0, crosstalk_sigma=0.05, **common + ) + ) + # Same probes laid out flat: per-probe GT must match the grid layout. + flat = scene.add_sensor(gs.sensors.KinematicTaxel(probe_local_pos=grid_positions.reshape(-1, 3).tolist(), **common)) + + scene.build(n_envs=0) + sphere.set_pos((0.0, 0.0, BOX_BOTTOM_Z + BOX_SIZE + SPHERE_RADIUS - 0.010)) + scene.step() + + plain_meas_force = plain.read().force + crosstalk_meas_force = crosstalk.read().force + plain_gt_force = plain.read_ground_truth().force + crosstalk_gt_force = crosstalk.read_ground_truth().force + + # GT branch is untouched by crosstalk. + assert_allclose(crosstalk_gt_force, plain_gt_force, tol=gs.EPS) + + # Plain measured equals GT (no transforms enabled on plain sensor). + assert_allclose(plain_meas_force, plain_gt_force, tol=gs.EPS) + + plain_force_mag = torch.linalg.norm(plain_meas_force, dim=-1) + iy_c, ix_c = (plain_force_mag == plain_force_mag.max()).nonzero(as_tuple=False)[0].tolist() + assert (iy_c, ix_c) == (ny // 2, nx // 2) + + crosstalk_force_mag = torch.linalg.norm(crosstalk_meas_force, dim=-1) + # Center magnitude on crosstalk sensor is reduced vs plain (energy redistributed). + assert crosstalk_force_mag[iy_c, ix_c] < plain_force_mag[iy_c, ix_c] + # A probe outside the contact patch (2 spacings from center) was ~zero on plain; crosstalk leaks force there. + plain_far = plain_force_mag[0, 0].item() + crosstalk_far = crosstalk_force_mag[0, 0].item() + assert plain_far < 1e-4, f"far probe should be ~zero on plain sensor (got {plain_far})" + assert crosstalk_far > 1e-4, f"far probe should pick up crosstalk leakage (got {crosstalk_far})" + + # Total Fz across the grid is preserved up to Gaussian-tail leakage past the output slice boundary. + plain_total_fz = plain_meas_force[..., 2].sum().item() + crosstalk_total_fz = crosstalk_meas_force[..., 2].sum().item() + assert np.isclose(plain_total_fz, crosstalk_total_fz, rtol=5e-2, atol=1e-5), ( + f"plain={plain_total_fz}, crosstalk={crosstalk_total_fz}" + ) + + # crosstalk_strength=0 is the exact no-crosstalk path (even with a non-zero sigma). + assert_allclose(crosstalk_off.read().force, plain_meas_force, tol=gs.EPS) + assert_allclose(crosstalk_off.read().torque, plain.read().torque, tol=gs.EPS) + + # A grid layout produces the same per-probe GT as a flat layout at the identical positions. + flat_gt = flat.read_ground_truth() + assert_allclose(plain_gt_force.reshape(-1, 3), flat_gt.force, tol=gs.EPS) + assert_allclose(plain.read_ground_truth().torque.reshape(-1, 3), flat_gt.torque, tol=gs.EPS) + + @pytest.mark.required @pytest.mark.parametrize("n_envs", [0, 2]) def test_elastomer_sensor_sphere_ground_dilate_shear(show_viewer, tol, n_envs): @@ -1845,6 +2242,7 @@ def test_elastomer_sensor_sphere_ground_dilate_shear(show_viewer, tol, n_envs): N_RINGS = 3 LATERAL_SHIFT = 0.01 SHEAR_SCALE = 100.0 + GAIN = 2.0 scene = gs.Scene( sim_options=gs.options.SimOptions( @@ -1912,26 +2310,40 @@ def test_elastomer_sensor_sphere_ground_dilate_shear(show_viewer, tol, n_envs): **sensor_kwargs, ) ) + # probe_gain variant: the measured marker displacement scales by the gain; GT is untouched. + gained_sensor = scene.add_sensor( + gs.sensors.ElastomerTaxel( + dilate_scale=1.0, + shear_scale=0.0, + probe_gain=GAIN, + **sensor_kwargs, + ) + ) assert not dilate_sensor._is_grid and not dilate_sensor._use_grid_fft scene.build(n_envs=n_envs) scene.step() - dilate_data = _as_env_batch(dilate_sensor.read_ground_truth(), n_envs) - shear_data = _as_env_batch(shear_sensor.read_ground_truth(), n_envs) - combined_data = _as_env_batch(combined_sensor.read_ground_truth(), n_envs) + dilate_data = dilate_sensor.read_ground_truth() + shear_data = shear_sensor.read_ground_truth() + combined_data = combined_sensor.read_ground_truth() normal_projection = (dilate_data * normals).sum(dim=-1) assert (normal_projection[..., 0] > tol).all(), "Bottom marker should dilate along its outward normal." assert torch.linalg.norm(dilate_data, dim=-1).max() > tol assert_allclose(shear_data, 0.0, tol=tol) assert_allclose(combined_data, dilate_data, tol=tol) + gained_meas = gained_sensor.read() + gained_gt = gained_sensor.read_ground_truth() + assert torch.linalg.norm(gained_gt, dim=-1).max() > tol # sanity: in contact + assert_allclose(gained_meas, gained_gt * GAIN, tol=tol) + sphere.set_pos((LATERAL_SHIFT, 0.0, sphere_init_pos[2])) scene.step() - dilate_data = _as_env_batch(dilate_sensor.read_ground_truth(), n_envs) - shear_data = _as_env_batch(shear_sensor.read_ground_truth(), n_envs) - combined_data = _as_env_batch(combined_sensor.read_ground_truth(), n_envs) + dilate_data = dilate_sensor.read_ground_truth() + shear_data = shear_sensor.read_ground_truth() + combined_data = combined_sensor.read_ground_truth() shear_normal_projection = (shear_data * normals).sum(dim=-1) shear_tangent = shear_data - shear_normal_projection.unsqueeze(-1) * normals assert torch.linalg.norm(shear_tangent, dim=-1).max() > tol @@ -2025,6 +2437,25 @@ def test_elastomer_sensor_grid_box_sphere(show_viewer, tol, n_envs): **sensor_kwargs, ) ) + # A non-default normal_exponent (cubic instead of the default quadratic normal dilation), one per path. + cubic_grid_sensor = scene.add_sensor( + gs.sensors.ElastomerTaxel( + probe_local_pos=probe_local_pos, + dilate_scale=1.0, + shear_scale=0.0, + normal_exponent=3.0, + **sensor_kwargs, + ) + ) + cubic_flat_sensor = scene.add_sensor( + gs.sensors.ElastomerTaxel( + probe_local_pos=probe_local_pos.reshape(-1, 3), + dilate_scale=1.0, + shear_scale=0.0, + normal_exponent=3.0, + **sensor_kwargs, + ) + ) assert elastomer_grid_sensor._is_grid and elastomer_grid_sensor._use_grid_fft assert not elastomer_sensor._is_grid and not elastomer_sensor._use_grid_fft assert_allclose(elastomer_sensor.probe_local_pos, elastomer_grid_sensor.probe_local_pos, tol=gs.EPS) @@ -2040,6 +2471,13 @@ def test_elastomer_sensor_grid_box_sphere(show_viewer, tol, n_envs): assert_allclose(shear_sensor.read_ground_truth(), 0.0, tol=tol) assert_allclose(combined_sensor.read_ground_truth(), flat_data, tol=tol) + # normal_exponent reshapes only the out-of-plane channel: the grid-FFT and direct paths still agree, and the + # cubic-normal response differs from the default quadratic one (sub-unit depths here, so depth**3 < depth**2). + cubic_data = cubic_grid_sensor.read_ground_truth() + assert_allclose(cubic_flat_sensor.read_ground_truth(), cubic_data, tol=tol) + cubic_diff = torch.as_tensor(cubic_data, device=gs.device) - torch.as_tensor(grid_data, device=gs.device) + assert torch.linalg.norm(cubic_diff, dim=-1).max() > tol, "normal_exponent=3 should change the dilation output" + # Test combined displacement: dilate + shear contributions should add when the box slides laterally. box.set_pos((LATERAL_SHIFT, 0.0, SPHERE_RADIUS * 2 + BOX_SIZE / 2 - PENETRATION)) scene.step() @@ -2056,12 +2494,140 @@ def test_elastomer_sensor_grid_box_sphere(show_viewer, tol, n_envs): assert_equal(combined_sensor.read_ground_truth(), 0.0, err_msg="ElastomerTaxel should be zero in air.") +@pytest.mark.required +@pytest.mark.parametrize("n_envs", [0, 2]) +def test_tactile_filler_probes_radius_zero(show_viewer, tol, n_envs): + """probe_radius == 0 marks inactive filler probes on ElastomerTaxel / KinematicTaxel: they read 0 and are + excluded from dilation / force, letting an irregular taxel set be padded into a regular grid for FFT.""" + SPHERE_RADIUS = 0.1 + BOX_SIZE = 0.1 + PENETRATION = 0.01 + GRID = (8, 8) + RADIUS = 0.02 + + scene = gs.Scene( + sim_options=gs.options.SimOptions( + gravity=(0.0, 0.0, 0.0), + ), + profiling_options=gs.options.ProfilingOptions( + show_FPS=False, + ), + show_viewer=show_viewer, + ) + sphere = scene.add_entity( + gs.morphs.Sphere( + radius=SPHERE_RADIUS, + pos=(0.0, 0.0, SPHERE_RADIUS), + fixed=True, + ) + ) + box = scene.add_entity( + gs.morphs.Box( + size=(BOX_SIZE, BOX_SIZE, BOX_SIZE), + pos=(0.0, 0.0, SPHERE_RADIUS * 2 + BOX_SIZE / 2 - PENETRATION), + fixed=False, + ) + ) + grid_pos = gu.generate_grid_points_on_plane( + lo=(-BOX_SIZE / 2, -BOX_SIZE / 2, -BOX_SIZE / 2), + hi=(BOX_SIZE / 2, BOX_SIZE / 2, -BOX_SIZE / 2), + normal=(0.0, 0.0, -1.0), + nx=GRID[0], + ny=GRID[1], + ) + flat_pos = grid_pos.reshape(-1, 3) + # Mark a 2x2 corner block (flat indices iy*nx+ix) as inactive fillers; the rest sense normally. + filler_idx = [0, 1, GRID[0], GRID[0] + 1] + radii = np.full(flat_pos.shape[0], RADIUS) + radii[filler_idx] = 0.0 + active_mask = radii > 0.0 + + elastomer_kwargs = dict( + entity_idx=box.idx, + probe_local_normal=(0.0, 0.0, -1.0), + track_link_idx=(sphere.base_link_idx,), + n_sample_points=600, + lambda_s=0.0, + shear_scale=0.0, + dilate_scale=1.0, + draw_debug=show_viewer, + ) + elastomer_grid = scene.add_sensor( + gs.sensors.ElastomerTaxel( + probe_local_pos=grid_pos, + probe_radius=radii.tolist(), + **elastomer_kwargs, + ) + ) + elastomer_active = scene.add_sensor( + gs.sensors.ElastomerTaxel( + probe_local_pos=flat_pos[active_mask], + probe_radius=RADIUS, + **elastomer_kwargs, + ) + ) + kinematic_kwargs = dict( + entity_idx=box.idx, + probe_local_normal=(0.0, 0.0, -1.0), + normal_stiffness=500.0, + draw_debug=show_viewer, + ) + kinematic_grid = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_pos=grid_pos, + probe_radius=radii.tolist(), + **kinematic_kwargs, + ) + ) + kinematic_full = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_pos=grid_pos, + probe_radius=RADIUS, + **kinematic_kwargs, + ) + ) + kinematic_crosstalk = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_pos=grid_pos, + probe_radius=radii.tolist(), + crosstalk_strength=1.0, + crosstalk_sigma=BOX_SIZE / GRID[0], + **kinematic_kwargs, + ) + ) + assert elastomer_grid._use_grid_fft + scene.build(n_envs=n_envs) + scene.step() + + # ElastomerTaxel (FFT dilation): filler probes read 0; active probes match a sensor built from only the + # active probes -- the fillers contribute no dilation, so the active readings are unchanged by their padding. + # The grid-input sensor reports (..., ny, nx, 3); flatten the grid axes for filler-index comparison. + grid_data = torch.as_tensor(elastomer_grid.read_ground_truth(), device=gs.device).flatten(-3, -2) + active_data = torch.as_tensor(elastomer_active.read_ground_truth(), device=gs.device) + assert torch.linalg.norm(grid_data, dim=-1).max() > tol, "active elastomer probes should detect contact" + assert_allclose(grid_data[..., filler_idx, :], 0.0, tol=gs.EPS) + assert_allclose(grid_data[..., active_mask, :], active_data, tol=tol) + + # KinematicTaxel: filler probes read 0 force; active probes match the all-active grid (per-probe force). + # KinematicTaxel reports a grid-shaped (..., ny, nx, 3) reading; flatten the grid axes to the flat index. + kin_grid = torch.as_tensor(kinematic_grid.read().force, device=gs.device).flatten(-3, -2) + kin_full = torch.as_tensor(kinematic_full.read().force, device=gs.device).flatten(-3, -2) + assert torch.linalg.norm(kin_full, dim=-1).max() > tol, "active kinematic probes should detect contact" + assert_allclose(kin_grid[..., filler_idx, :], 0.0, tol=gs.EPS) + assert_allclose(kin_grid[..., active_mask, :], kin_full[..., active_mask, :], tol=tol) + + # KinematicTaxel FFT crosstalk smears neighbour force, but filler probes are still masked back to 0. + kin_xt = torch.as_tensor(kinematic_crosstalk.read().force, device=gs.device).flatten(-3, -2) + assert_allclose(kin_xt[..., filler_idx, :], 0.0, tol=gs.EPS) + + @pytest.mark.required @pytest.mark.parametrize("n_envs", [0, 2]) def test_proximity_sensor_box_on_box(show_viewer, tol, n_envs): """ProximityTaxel reports a nonzero point-cloud force in contact and near-zero force in air.""" BOX_SIZE = 0.2 PENETRATION = 0.01 + GAIN = 1.5 scene = gs.Scene( sim_options=gs.options.SimOptions( @@ -2100,16 +2666,37 @@ def test_proximity_sensor_box_on_box(show_viewer, tol, n_envs): draw_debug=show_viewer, ) ) + # probe_gain variant (no radius noise so the measured branch is deterministic): force is linear in the summed + # penetration, so the measured force scales by the gain while GT is untouched. + gained_sensor = scene.add_sensor( + gs.sensors.ProximityTaxel( + entity_idx=taxel_box.idx, + probe_local_pos=((0.0, 0.0, -BOX_SIZE / 2), (BOX_SIZE / 4, 0.0, -BOX_SIZE / 2)), + probe_local_normal=(0.0, 0.0, -1.0), + probe_radius=0.06, + probe_gain=GAIN, + track_link_idx=(support.base_link_idx,), + n_sample_points=600, + stiffness=100.0, + shear_coupling=0.0, + draw_debug=show_viewer, + ) + ) scene.build(n_envs=n_envs) scene.step() - force_norm = torch.linalg.norm(_as_env_batch(sensor.read_ground_truth().force, n_envs), dim=-1) + force_norm = torch.linalg.norm(sensor.read_ground_truth().force, dim=-1) assert (force_norm > tol).all() + gained_meas = gained_sensor.read().force + gained_gt = gained_sensor.read_ground_truth().force + assert (torch.linalg.norm(gained_gt, dim=-1) > tol).all() # sanity: in contact + assert_allclose(gained_meas, gained_gt * GAIN, tol=tol) + taxel_box.set_pos((0.0, 0.0, BOX_SIZE + BOX_SIZE / 2 + 0.2)) scene.step() - force_norm = torch.linalg.norm(_as_env_batch(sensor.read_ground_truth().force, n_envs), dim=-1) + force_norm = torch.linalg.norm(sensor.read_ground_truth().force, dim=-1) assert_allclose(force_norm, 0.0, tol=gs.EPS) From 7b4765489aae6f40c5c4302a969b281585f205c5 Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Sun, 24 May 2026 16:59:48 -0400 Subject: [PATCH 2/7] fix elastomertaxel is_grid --- genesis/engine/sensors/point_cloud_tactile.py | 4 ++-- tests/test_sensors.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/genesis/engine/sensors/point_cloud_tactile.py b/genesis/engine/sensors/point_cloud_tactile.py index d7f66a51f5..00739408d5 100644 --- a/genesis/engine/sensors/point_cloud_tactile.py +++ b/genesis/engine/sensors/point_cloud_tactile.py @@ -1505,12 +1505,12 @@ class ElastomerTaxelSensor( def __init__(self, sensor_options: ElastomerTaxelSensorOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) # FFT-grid eligibility check (flat pos/normals are already populated by the base mixins). - is_grid = len(self._probe_layout_shape) == 2 + self._is_grid = len(self._probe_layout_shape) == 2 _, _, self._use_grid_fft, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( normalize_grid_probe_layout( np.asarray(sensor_options.probe_local_pos, dtype=gs.np_float), np.asarray(sensor_options.probe_local_normal, dtype=gs.np_float), - is_grid, + self._is_grid, ) ) self._grid_normal = torch.tensor(grid_normal, dtype=gs.tc_float, device=gs.device) diff --git a/tests/test_sensors.py b/tests/test_sensors.py index 1614012a3e..bf4edf26a2 100644 --- a/tests/test_sensors.py +++ b/tests/test_sensors.py @@ -2464,18 +2464,19 @@ def test_elastomer_sensor_grid_box_sphere(show_viewer, tol, n_envs): scene.step() # Test dilate displacement: grid sensor should match the flat-layout sensor and detect contact magnitude. - grid_data = elastomer_grid_sensor.read_ground_truth() + # The grid-input sensor reports (..., ny, nx, 3); flatten the grid axes for comparison with the flat sensor. + grid_data = torch.as_tensor(elastomer_grid_sensor.read_ground_truth(), device=gs.device).flatten(-3, -2) flat_data = elastomer_sensor.read_ground_truth() assert_allclose(flat_data, grid_data, tol=tol) - assert torch.linalg.norm(torch.as_tensor(grid_data, device=gs.device), dim=-1).max() > tol + assert torch.linalg.norm(grid_data, dim=-1).max() > tol assert_allclose(shear_sensor.read_ground_truth(), 0.0, tol=tol) assert_allclose(combined_sensor.read_ground_truth(), flat_data, tol=tol) # normal_exponent reshapes only the out-of-plane channel: the grid-FFT and direct paths still agree, and the # cubic-normal response differs from the default quadratic one (sub-unit depths here, so depth**3 < depth**2). - cubic_data = cubic_grid_sensor.read_ground_truth() + cubic_data = torch.as_tensor(cubic_grid_sensor.read_ground_truth(), device=gs.device).flatten(-3, -2) assert_allclose(cubic_flat_sensor.read_ground_truth(), cubic_data, tol=tol) - cubic_diff = torch.as_tensor(cubic_data, device=gs.device) - torch.as_tensor(grid_data, device=gs.device) + cubic_diff = cubic_data - grid_data assert torch.linalg.norm(cubic_diff, dim=-1).max() > tol, "normal_exponent=3 should change the dilation output" # Test combined displacement: dilate + shear contributions should add when the box slides laterally. From df0b9673d5a114ad16b634a3a452e54bf54149e6 Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Mon, 25 May 2026 01:56:26 -0400 Subject: [PATCH 3/7] allow nonexact grid but with warning --- genesis/engine/sensors/kinematic_tactile.py | 24 ++++--- genesis/engine/sensors/point_cloud_tactile.py | 12 +++- genesis/engine/sensors/tactile_shared.py | 62 ++++++++++++------- 3 files changed, 66 insertions(+), 32 deletions(-) diff --git a/genesis/engine/sensors/kinematic_tactile.py b/genesis/engine/sensors/kinematic_tactile.py index cc04228871..ade9304931 100644 --- a/genesis/engine/sensors/kinematic_tactile.py +++ b/genesis/engine/sensors/kinematic_tactile.py @@ -775,10 +775,11 @@ class KinematicTaxelSensor( def __init__(self, sensor_options: KinematicTaxelOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - # FFT-grid eligibility: validates that a 2D layout has uniform spacing/normals/orthogonal tangents. + # FFT-grid eligibility: requires a 2D probe layout with non-degenerate spacing. Strict regularity + # (uniform normals, orthogonal tangents, exact rectangle) is reported separately as a warning. # Flat pos/normals are already populated by ProbeSensorMixin / ProbesWithNormalSensorMixin. is_grid = len(self._probe_layout_shape) == 2 - _, _, self._use_grid_fft, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( + _, _, self._use_grid_fft, is_grid_regular, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( normalize_grid_probe_layout( np.asarray(sensor_options.probe_local_pos, dtype=gs.np_float), np.asarray(sensor_options.probe_local_normal, dtype=gs.np_float), @@ -790,11 +791,18 @@ def __init__(self, sensor_options: KinematicTaxelOptions, sensor_idx: int, senso self._grid_tangent_v = torch.tensor(grid_tangent_v, dtype=gs.tc_float, device=gs.device) self._grid_spacing = torch.tensor(grid_spacing, dtype=gs.tc_float, device=gs.device) - if self._options.crosstalk_strength > 0.0 and not self._use_grid_fft: - gs.raise_exception( - "KinematicTaxel crosstalk requires a validated grid layout (probe_local_pos shape (ny, nx, 3) with " - "uniform spacing, uniform normals, and orthogonal tangents)." - ) + if self._options.crosstalk_strength > 0.0: + if not self._use_grid_fft: + gs.raise_exception( + "KinematicTaxel crosstalk requires a 2D grid-shaped probe_local_pos (shape (ny, nx, 3) with " + f"ny, nx >= 2 and non-degenerate spacing); got shape {tuple(self._probe_layout_shape)}." + ) + if not is_grid_regular: + gs.logger.warning( + "KinematicTaxel crosstalk grid is not strictly regular (uniform spacing, uniform normals, " + "orthogonal tangents); FFT crosstalk will use averaged spacing and normal as a best-fit " + "approximation." + ) def build(self): super().build() @@ -815,7 +823,7 @@ def build(self): self._shared_metadata.twist_scalar, float(self._options.twist_scalar), expand=(1,) ) - if self._options.crosstalk_strength > 0.0: + if self._options.crosstalk_strength > 0.0 and self._use_grid_fft: self._register_crosstalk() def _get_return_format(self) -> tuple[tuple[int, ...], ...]: diff --git a/genesis/engine/sensors/point_cloud_tactile.py b/genesis/engine/sensors/point_cloud_tactile.py index 00739408d5..e20908d375 100644 --- a/genesis/engine/sensors/point_cloud_tactile.py +++ b/genesis/engine/sensors/point_cloud_tactile.py @@ -1504,9 +1504,11 @@ class ElastomerTaxelSensor( ): def __init__(self, sensor_options: ElastomerTaxelSensorOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - # FFT-grid eligibility check (flat pos/normals are already populated by the base mixins). + # FFT-grid eligibility check (flat pos/normals are already populated by the base mixins). 2D layouts with + # non-degenerate spacing use the FFT dilation path; strictly irregular grids still take that path with + # averaged metadata and only emit a warning. self._is_grid = len(self._probe_layout_shape) == 2 - _, _, self._use_grid_fft, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( + _, _, self._use_grid_fft, is_grid_regular, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( normalize_grid_probe_layout( np.asarray(sensor_options.probe_local_pos, dtype=gs.np_float), np.asarray(sensor_options.probe_local_normal, dtype=gs.np_float), @@ -1518,6 +1520,12 @@ def __init__(self, sensor_options: ElastomerTaxelSensorOptions, sensor_idx: int, self._grid_tangent_v = torch.tensor(grid_tangent_v, dtype=gs.tc_float, device=gs.device) self._grid_spacing = torch.tensor(grid_spacing, dtype=gs.tc_float, device=gs.device) + if self._use_grid_fft and not is_grid_regular: + gs.logger.warning( + "ElastomerTaxel grid is not strictly regular (uniform spacing, uniform normals, orthogonal " + "tangents); FFT dilation will use averaged spacing and normal as a best-fit approximation." + ) + def build(self): super().build() diff --git a/genesis/engine/sensors/tactile_shared.py b/genesis/engine/sensors/tactile_shared.py index 7613db64a1..df430a8ac4 100644 --- a/genesis/engine/sensors/tactile_shared.py +++ b/genesis/engine/sensors/tactile_shared.py @@ -124,17 +124,23 @@ def expand_probe_normals(normals: np.ndarray, n_probes: int, probe_shape: tuple[ def normalize_grid_probe_layout( probe_pos: np.ndarray, probe_normals: np.ndarray, is_grid: bool -) -> tuple[np.ndarray, np.ndarray, bool, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray, bool, bool, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ Validate a probe layout and extract grid-FFT metadata when the layout qualifies. - Returns ``(flat_positions, flat_normals, use_grid_fft, grid_normal, tangent_u, tangent_v, grid_spacing)``. When - the layout is flat (``is_grid=False``) or fails any grid-FFT precondition, ``use_grid_fft`` is False and the - tangent / spacing entries are zero. + Returns ``(flat_positions, flat_normals, use_grid_fft, is_grid_regular, grid_normal, tangent_u, tangent_v, + grid_spacing)``. - Grid-FFT preconditions: shape ``(ny, nx, 3)`` with ``ny, nx >= 2``, normals uniform within tolerance, tangents - orthogonal, both tangents in the plane perpendicular to the normal, and all interior probes laid out on a - regular ``(spacing_u, spacing_v)`` rectangle. + ``use_grid_fft`` is True when the layout has shape ``(ny, nx, 3)`` with ``ny, nx >= 2`` and non-degenerate + spacing along both axes -- the FFT path is usable and the grid metadata is populated as a best-fit + approximation (average step vectors over all adjacent pairs, average unit normal over all probes). + + ``is_grid_regular`` is True when, in addition, the layout is strictly regular: normals uniform within + tolerance, tangents orthogonal, both tangents in the plane perpendicular to the normal, and all probes lie + on the regular ``(spacing_u, spacing_v)`` rectangle implied by the averaged steps. Callers that proceed + with FFT on an irregular layout (``use_grid_fft`` and not ``is_grid_regular``) should warn the user. + + When ``use_grid_fft`` is False, the tangent / spacing / normal entries are zero. """ probe_shape = probe_pos.shape[:-1] flat = probe_pos.reshape(-1, 3) @@ -146,6 +152,7 @@ def normalize_grid_probe_layout( normals = normals / normal_norms[:, None] use_grid_fft = False + is_grid_regular = False grid_normal = np.zeros(3, dtype=gs.np_float) tangent_u = np.zeros(3, dtype=gs.np_float) tangent_v = np.zeros(3, dtype=gs.np_float) @@ -157,14 +164,23 @@ def normalize_grid_probe_layout( ny, nx = int(probe_shape[0]), int(probe_shape[1]) if nx >= 2 and ny >= 2: grid = probe_pos.reshape(ny, nx, 3) - step_u = grid[0, 1] - grid[0, 0] - step_v = grid[1, 0] - grid[0, 0] - spacing_u = float(np.linalg.norm(step_u)) - spacing_v = float(np.linalg.norm(step_v)) + # Averaged step vectors across all adjacent pairs along each axis -- robust to local jitter. + avg_step_u = (grid[:, 1:, :] - grid[:, :-1, :]).reshape(-1, 3).mean(axis=0) + avg_step_v = (grid[1:, :, :] - grid[:-1, :, :]).reshape(-1, 3).mean(axis=0) + spacing_u = float(np.linalg.norm(avg_step_u)) + spacing_v = float(np.linalg.norm(avg_step_v)) if spacing_u >= gs.EPS and spacing_v >= gs.EPS: - tangent_u_candidate = (step_u / spacing_u).astype(gs.np_float) - tangent_v_candidate = (step_v / spacing_v).astype(gs.np_float) - normal_candidate = normals[0].astype(gs.np_float, copy=False) + tangent_u_candidate = (avg_step_u / spacing_u).astype(gs.np_float) + tangent_v_candidate = (avg_step_v / spacing_v).astype(gs.np_float) + # Average unit normal across all probes. If they cancel out (e.g. opposing normals), fall back + # to the first probe's normal so downstream FFT still has a defined orientation. + avg_normal = normals.mean(axis=0) + normal_norm = float(np.linalg.norm(avg_normal)) + if normal_norm < gs.EPS: + normal_candidate = normals[0].astype(gs.np_float, copy=False) + else: + normal_candidate = (avg_normal / normal_norm).astype(gs.np_float) + normals_are_uniform = bool(np.all(normals @ normal_candidate >= 1.0 - _GRID_TOL)) axes_are_orthogonal = abs(float(tangent_u_candidate @ tangent_v_candidate)) <= _GRID_TOL axes_in_plane = ( @@ -173,21 +189,23 @@ def normalize_grid_probe_layout( ) expected = ( grid[0, 0] - + np.arange(nx, dtype=gs.np_float)[None, :, None] * step_u[None, None, :] - + np.arange(ny, dtype=gs.np_float)[:, None, None] * step_v[None, None, :] + + np.arange(nx, dtype=gs.np_float)[None, :, None] * avg_step_u[None, None, :] + + np.arange(ny, dtype=gs.np_float)[:, None, None] * avg_step_v[None, None, :] ) is_regular = bool(np.max(np.linalg.norm(grid - expected, axis=-1)) <= _GRID_TOL) - use_grid_fft = normals_are_uniform and axes_are_orthogonal and axes_in_plane and is_regular - if use_grid_fft: - grid_normal = normal_candidate - tangent_u = tangent_u_candidate - tangent_v = tangent_v_candidate - grid_spacing = np.array((spacing_u, spacing_v), dtype=gs.np_float) + + use_grid_fft = True + is_grid_regular = normals_are_uniform and axes_are_orthogonal and axes_in_plane and is_regular + grid_normal = normal_candidate + tangent_u = tangent_u_candidate + tangent_v = tangent_v_candidate + grid_spacing = np.array((spacing_u, spacing_v), dtype=gs.np_float) return ( flat.astype(gs.np_float, copy=False), normals.astype(gs.np_float, copy=False), use_grid_fft, + is_grid_regular, grid_normal.astype(gs.np_float, copy=False), tangent_u.astype(gs.np_float, copy=False), tangent_v.astype(gs.np_float, copy=False), From 077c19c86cf99a6508a8e1ba3608a96fd1e07f6e Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Mon, 25 May 2026 17:55:28 -0400 Subject: [PATCH 4/7] crosstalk reduce mem usage --- genesis/engine/sensors/kinematic_tactile.py | 227 ++++++++++---------- genesis/options/sensors/tactile.py | 6 +- 2 files changed, 111 insertions(+), 122 deletions(-) diff --git a/genesis/engine/sensors/kinematic_tactile.py b/genesis/engine/sensors/kinematic_tactile.py index ade9304931..a61ae0aa11 100644 --- a/genesis/engine/sensors/kinematic_tactile.py +++ b/genesis/engine/sensors/kinematic_tactile.py @@ -1,10 +1,11 @@ import math -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import TYPE_CHECKING, Generic, NamedTuple, TypeVar import numpy as np import quadrants as qd import torch +import torch.nn.functional as F import genesis as gs import genesis.utils.array_class as array_class @@ -27,12 +28,9 @@ get_measured_bufs, ) from .tactile_shared import ( - GridFFTConvMetadataMixin, ViscoelasticHysteresisMetadataMixin, ViscoelasticHysteresisMixin, - next_pow2, normalize_grid_probe_layout, - register_grid_fft_sensor, ) if TYPE_CHECKING: @@ -582,7 +580,6 @@ class KinematicTaxelData(NamedTuple): @dataclass class KinematicTaxelMetadata( ViscoelasticHysteresisMetadataMixin, - GridFFTConvMetadataMixin, ProbesWithNormalSensorMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata, @@ -593,108 +590,104 @@ class KinematicTaxelMetadata( shear_scalar: torch.Tensor = make_tensor_field((0,)) twist_scalar: torch.Tensor = make_tensor_field((0,)) - # Spatial crosstalk reuses the shared ``GridFFTConvMetadataMixin`` state. ``grid_fft_meta`` tuples for this - # sensor are ``(sensor_idx, g_ny, g_nx, probe_start, cache_start, sigma, strength, spacing_u, spacing_v)``; - # the kernel is a combined ``(1 - strength) * identity + strength * Gaussian / sum(Gaussian)`` blur and the - # per-step buffer has 6 channels (force xyz + torque xyz). + # Per-sensor spatial crosstalk state. Each entry of ``crosstalk_meta`` is + # ``(g_ny, g_nx, probe_start, cache_start, strength, r_v, r_u)``; ``crosstalk_kernels_{v,u}`` hold the matching + # depthwise 1D Gaussian weights pre-shaped for ``F.conv2d`` with ``groups=6``. The 2D Gaussian is applied as two + # separable depthwise passes (kv along v-axis then ku along u-axis) on a per-sensor ``(B, 6, g_ny, g_nx)`` + # temporary, with an identity blend ``(1 - strength) * x + strength * conv(x)``. No persistent buffer is held. + crosstalk_meta: list[tuple] = field(default_factory=list) + crosstalk_kernels_v: list[torch.Tensor] = field(default_factory=list) + crosstalk_kernels_u: list[torch.Tensor] = field(default_factory=list) + any_crosstalk: bool = False -@torch.jit.script -def _precompute_crosstalk_kernel_fft( +def _build_separable_crosstalk_kernels( sigma: float, - strength: float, - grid_spacing: tuple[float, float], - fft_n: tuple[int, int], + spacing_u: float, + spacing_v: float, device: torch.device, dtype: torch.dtype, -) -> torch.Tensor: - """Combined ``(1 - strength) * identity + strength * Gaussian/sum(Gaussian)`` kernel, real-FFT'd. - - Kernel is centered on the FFT origin via ``ifftshift`` so circular convolution is equivalent to convolution with - a kernel anchored at the taxel itself. The Gaussian is L1-normalized so a uniform field passes through unchanged - (DC bin = 1); the identity-blend keeps the response peaked at the source taxel and the rest leaked into the - Gaussian skirt. The output is a complex ``(fft_n[0], fft_n[1] // 2 + 1)`` half-spectrum ready to multiply against - ``rfft2(field)``. +) -> tuple[torch.Tensor, torch.Tensor, int, int]: + """Build two L1-normalized depthwise 1D Gaussian kernels for separable crosstalk convolution. + + A 2D isotropic Gaussian is the outer product of two 1D Gaussians, so it is applied as two sequential 1D + convolutions (one per axis) on the unpadded active grid. Each 1D kernel has half-radius + ``r = ceil(3 * sigma / spacing)`` (3-sigma truncation; tail leakage below ~0.3%) and is L1-normalized after + truncation so a uniform field passes through unchanged. + + Returns ``(kernel_v, kernel_u, r_v, r_u)``. The kernels are pre-shaped for ``F.conv2d`` with ``groups=6``: + ``kernel_v`` has shape ``(6, 1, 2*r_v + 1, 1)`` (axis 0 of the field, the v / ny axis) and ``kernel_u`` has + shape ``(6, 1, 1, 2*r_u + 1)`` (axis 1 of the field, the u / nx axis). The kernel is replicated across all 6 + channels (force xyz + torque xyz) so a single ``groups=6`` call covers everything. """ - i = torch.arange(fft_n[0], dtype=dtype, device=device) - j = torch.arange(fft_n[1], dtype=dtype, device=device) - yy, xx = torch.meshgrid((i - fft_n[0] // 2) * grid_spacing[0], (j - fft_n[1] // 2) * grid_spacing[1], indexing="ij") - sigma_t = torch.tensor(sigma, dtype=dtype, device=device) - g = torch.exp(-(xx * xx + yy * yy) / (2.0 * sigma_t * sigma_t)) - g = g / g.sum() - # Identity in centered layout: 1 at the central cell. ``ifftshift`` then aligns it with FFT index 0. - identity = torch.zeros_like(g) - identity[fft_n[0] // 2, fft_n[1] // 2] = 1.0 - combined = (1.0 - strength) * identity + strength * g - combined = torch.fft.ifftshift(combined, dim=(-2, -1)) - return torch.fft.rfft2(combined) - - -def _crosstalk_kernel_builder(meta_entry: tuple, fft_n: tuple[int, int]) -> torch.Tensor: - """``register_grid_fft_sensor`` kernel builder for spatial crosstalk: 1 plane (identity-blended Gaussian). - - ``meta_entry`` is ``(sensor_idx, g_ny, g_nx, probe_start, cache_start, sigma, strength, spacing_u, spacing_v)``. - The crosstalk kernel's axis 0 spans ny / tangent_v and axis 1 spans nx / tangent_u, so spacing is passed as - ``(spacing_v, spacing_u)``. - """ - _, _, _, _, _, sigma, strength, spacing_u, spacing_v = meta_entry - k = _precompute_crosstalk_kernel_fft(sigma, strength, (spacing_v, spacing_u), fft_n, gs.device, gs.tc_float) - return k.unsqueeze(0) # (1, fft_ny, fft_nx) -- single kernel plane - - -def _kinematic_taxel_grid_fft_crosstalk( - grid_fft_meta: list[tuple], - grid_fft_kernels_stacked: torch.Tensor, + r_v = max(1, int(math.ceil(3.0 * sigma / spacing_v))) + r_u = max(1, int(math.ceil(3.0 * sigma / spacing_u))) + iv = torch.arange(2 * r_v + 1, dtype=dtype, device=device) - r_v + iu = torch.arange(2 * r_u + 1, dtype=dtype, device=device) - r_u + gv = torch.exp(-((iv * spacing_v) ** 2) / (2.0 * sigma * sigma)) + gu = torch.exp(-((iu * spacing_u) ** 2) / (2.0 * sigma * sigma)) + gv = gv / gv.sum() + gu = gu / gu.sum() + kernel_v = gv.view(1, 1, -1, 1).repeat(6, 1, 1, 1) + kernel_u = gu.view(1, 1, 1, -1).repeat(6, 1, 1, 1) + return kernel_v, kernel_u, r_v, r_u + + +def _kinematic_taxel_grid_separable_crosstalk( + crosstalk_meta: list[tuple], + crosstalk_kernels_v: list[torch.Tensor], + crosstalk_kernels_u: list[torch.Tensor], cache_data: torch.Tensor, - grid_fft_buffer: torch.Tensor, probe_radii: torch.Tensor, ) -> None: """ - Apply per-sensor 2D-FFT spatial crosstalk to all 6 channels (force xyz + torque xyz) of every registered - grid-crosstalk KinematicTaxel sensor. Mutates ``cache_data`` in place. - - ``cache_data`` is the per-class intermediate cache in ``(B, total_cols)`` layout. Each KinematicTaxel sensor's - slice spans ``2 * n_probes * 3`` columns: 3 force xyz cols per probe, then 3 torque xyz cols per probe. + Apply per-sensor 2D Gaussian spatial crosstalk to all 6 channels (force xyz + torque xyz) of every registered + grid-crosstalk KinematicTaxel sensor as two depthwise 1D convolutions (separable). Mutates ``cache_data`` in + place. + + ``cache_data`` is the per-class intermediate cache in ``(B, total_cols)`` layout. Each sensor's slice spans + ``2 * n_probes * 3`` columns: 3 force xyz cols per probe, then 3 torque xyz cols per probe, both probe-major + (probe flat index ``iy * nx + ix``). Peak per-sensor working memory is ~3 * (B * 6 * g_ny * g_nx * sizeof(float)): + the input field, the intermediate after the first pass, and the blurred output. No persistent buffer is held. """ - if not grid_fft_meta: + if not crosstalk_meta: return B = cache_data.shape[0] - fft_ny, fft_nx = grid_fft_buffer.shape[-2], grid_fft_buffer.shape[-1] - - # 1) Fill the active region of the buffer. The zero-padding region is never written here and stays zero from - # allocation (``register_grid_fft_sensor`` allocates with ``torch.zeros``); the active ``[:g_ny, :g_nx]`` region - # is fully overwritten every step, so no per-step ``zero_()`` is needed. - for grid_pos, (_, g_ny, g_nx, _, cache_start, _, _, _, _) in enumerate(grid_fft_meta): + for (g_ny, g_nx, probe_start, cache_start, strength, r_v, r_u), kv, ku in zip( + crosstalk_meta, crosstalk_kernels_v, crosstalk_kernels_u + ): n_probes = g_ny * g_nx - # Layout in cache: force.xyz for all probes, then torque.xyz for all probes. Each block is ``n_probes * 3`` - # cols, with probe-major ordering matching probe flat index ``iy * nx + ix``. + # Build a contiguous (B, 6, g_ny, g_nx) field: force xyz then torque xyz stacked on the channel axis. The + # cache holds probe-major ``iy * nx + ix`` ordering, so reshape (B, n_probes, 3) -> (B, ny, nx, 3) and + # permute -> (B, 3, ny, nx) per group, then concat along channels. ``.contiguous()`` materializes the + # permuted layout that ``F.conv2d`` needs. force_block = cache_data[:, cache_start : cache_start + n_probes * 3] torque_block = cache_data[:, cache_start + n_probes * 3 : cache_start + 2 * n_probes * 3] - # Reshape (B, ny, nx, 3) -> (B, 3, ny, nx); the slice-assignment accepts the non-contiguous permuted view. - grid_fft_buffer[:, grid_pos, 0:3, :g_ny, :g_nx] = force_block.view(B, g_ny, g_nx, 3).permute(0, 3, 1, 2) - grid_fft_buffer[:, grid_pos, 3:6, :g_ny, :g_nx] = torque_block.view(B, g_ny, g_nx, 3).permute(0, 3, 1, 2) - - # 2) Batched real FFT over the last two dims; kernel is per-sensor, broadcast over B and 6 channels. Inputs are - # real so ``rfft2`` (half spectrum) is ~2x cheaper than the full complex ``fft2``. - H_fft = torch.fft.rfft2(grid_fft_buffer) # (B, n_grid_xt, 6, fft_ny, fft_nx // 2 + 1) complex - # Stacked kernels: (n_grid_xt, 1, fft_ny, fft_nx // 2 + 1) -> (1, n_grid_xt, 1, ...) for broadcast. - K = grid_fft_kernels_stacked.unsqueeze(0) - smeared = torch.fft.irfft2(H_fft * K, s=(fft_ny, fft_nx)) # (B, n_grid_xt, 6, fft_ny, fft_nx) - - # 3) Slice each sensor back to its (g_ny, g_nx) grid and write into the cache. - for grid_pos, (_, g_ny, g_nx, probe_start, cache_start, _, _, _, _) in enumerate(grid_fft_meta): - n_probes = g_ny * g_nx - # Zero inactive filler probes (probe_radius == 0): the blur smears neighbour force/torque into their cells. - active = (probe_radii[probe_start : probe_start + n_probes] > 0.0).to(smeared.dtype).view(1, 1, g_ny, g_nx) - force_smeared = smeared[:, grid_pos, 0:3, :g_ny, :g_nx] * active # (B, 3, ny, nx) - torque_smeared = smeared[:, grid_pos, 3:6, :g_ny, :g_nx] * active - # Inverse of the permute used in step 1: (B, 3, ny, nx) -> (B, ny, nx, 3) -> flat (B, ny*nx*3). - cache_data[:, cache_start : cache_start + n_probes * 3] = force_smeared.permute(0, 2, 3, 1).reshape( - B, n_probes * 3 + force = force_block.view(B, g_ny, g_nx, 3).permute(0, 3, 1, 2) + torque = torque_block.view(B, g_ny, g_nx, 3).permute(0, 3, 1, 2) + field_in = torch.cat((force, torque), dim=1).contiguous() # (B, 6, g_ny, g_nx) + + # Depthwise separable convolution: one Gaussian per channel via groups=6. ``padding='zeros'`` (the default) + # means out-of-grid taps contribute zero, so probes near the edge see no spurious mass from beyond the + # sensor footprint. + tmp = F.conv2d(field_in, kv, groups=6, padding=(r_v, 0)) + blurred = F.conv2d(tmp, ku, groups=6, padding=(0, r_u)) + + # Identity blend: out = (1 - strength) * field_in + strength * blurred. Mathematically equivalent to + # convolving with ``(1 - strength) * delta + strength * Gaussian``. + out = field_in.mul_(1.0 - strength).add_(blurred, alpha=strength) + + # Zero inactive filler probes (probe_radius == 0): the blur leaks neighbour force/torque into their cells. + active = (probe_radii[probe_start : probe_start + n_probes] > 0.0).to(out.dtype).view(1, 1, g_ny, g_nx) + out.mul_(active) + + # Inverse of the build permute: (B, 3, ny, nx) -> (B, ny, nx, 3) -> flat (B, ny*nx*3). + cache_data[:, cache_start : cache_start + n_probes * 3] = ( + out[:, 0:3].permute(0, 2, 3, 1).reshape(B, n_probes * 3) + ) + cache_data[:, cache_start + n_probes * 3 : cache_start + 2 * n_probes * 3] = ( + out[:, 3:6].permute(0, 2, 3, 1).reshape(B, n_probes * 3) ) - cache_data[:, cache_start + n_probes * 3 : cache_start + 2 * n_probes * 3] = torque_smeared.permute( - 0, 2, 3, 1 - ).reshape(B, n_probes * 3) CrosstalkSharedMetadataT = TypeVar("CrosstalkSharedMetadataT", bound=KinematicTaxelMetadata) @@ -702,19 +695,20 @@ def _kinematic_taxel_grid_fft_crosstalk( class KinematicTaxelCrosstalkMixin(Generic[CrosstalkSharedMetadataT]): """ - Adds FFT-based spatial crosstalk (Gaussian blur, optionally mixed with identity) to KinematicTaxel on the - measured branch. Operates on all 6 channels (force xyz + torque xyz) of every grid-shaped sensor with - ``crosstalk_strength > 0``. Must come BEFORE ``SimpleSensor`` and AFTER ``ViscoelasticHysteresisMixin`` in MRO - so the data flow is: kernel output -> crosstalk -> hysteresis -> hardware imperfections. + Adds Gaussian spatial crosstalk (optionally mixed with identity) to KinematicTaxel on the measured branch. + Operates on all 6 channels (force xyz + torque xyz) of every grid-shaped sensor with ``crosstalk_strength > 0``. + Must come BEFORE ``SimpleSensor`` and AFTER ``ViscoelasticHysteresisMixin`` in MRO so the data flow is: + kernel output -> crosstalk -> hysteresis -> hardware imperfections. """ _shared_metadata: CrosstalkSharedMetadataT def _register_crosstalk(self): - """Register this sensor for FFT crosstalk via the shared ``register_grid_fft_sensor`` scaffolding. + """Build this sensor's separable Gaussian crosstalk kernels and append them to the shared metadata lists. - Called only when this sensor has a validated grid layout AND ``crosstalk_strength > 0``. The FFT size is - ``(ny, nx)``, padded for the Gaussian tail so circular wrap stays below tolerance. + Called only when this sensor has a validated grid layout AND ``crosstalk_strength > 0``. Stores two + L1-normalized 1D Gaussians (truncated at 3 sigma on each axis, pre-shaped for depthwise ``F.conv2d`` with + ``groups=6``); no persistent per-step buffer is allocated. """ sm = self._shared_metadata sensor_idx = sm.n_probes_per_sensor.shape[0] - 1 # this sensor was just registered @@ -725,18 +719,13 @@ def _register_crosstalk(self): strength = float(self._options.crosstalk_strength) spacing_u = float(self._grid_spacing[0].item()) spacing_v = float(self._grid_spacing[1].item()) - # FFT size per axis: grid extent + the 3-sigma Gaussian tail on each side, rounded up to a power of 2. - # Truncating at 3 sigma leaves circular wraparound below ~0.3% (sub-tolerance for a well-localized blur). - fft_ny = next_pow2(g_ny + 2 * int(math.ceil(3.0 * sigma / spacing_v))) - fft_nx = next_pow2(g_nx + 2 * int(math.ceil(3.0 * sigma / spacing_u))) - register_grid_fft_sensor( - sm, - meta_entry=(sensor_idx, g_ny, g_nx, probe_start, cache_start, sigma, strength, spacing_u, spacing_v), - this_fft_n=(fft_ny, fft_nx), - kernel_builder=_crosstalk_kernel_builder, - n_buffer_channels=6, - batch_size=self._manager._sim._B, + kernel_v, kernel_u, r_v, r_u = _build_separable_crosstalk_kernels( + sigma, spacing_u, spacing_v, gs.device, gs.tc_float ) + sm.crosstalk_meta.append((g_ny, g_nx, probe_start, cache_start, strength, r_v, r_u)) + sm.crosstalk_kernels_v.append(kernel_v) + sm.crosstalk_kernels_u.append(kernel_u) + sm.any_crosstalk = True @classmethod def _apply_transform( @@ -748,13 +737,13 @@ def _apply_transform( is_measured: bool, ): super()._apply_transform(shared_metadata, data, timeline, is_measured=is_measured) - if not is_measured or not shared_metadata.any_grid_fft: + if not is_measured or not shared_metadata.any_crosstalk: return - _kinematic_taxel_grid_fft_crosstalk( - shared_metadata.grid_fft_meta, - shared_metadata.grid_fft_kernels_stacked, + _kinematic_taxel_grid_separable_crosstalk( + shared_metadata.crosstalk_meta, + shared_metadata.crosstalk_kernels_v, + shared_metadata.crosstalk_kernels_u, data, - shared_metadata.grid_fft_buffer, shared_metadata.probe_radii, ) @@ -775,11 +764,11 @@ class KinematicTaxelSensor( def __init__(self, sensor_options: KinematicTaxelOptions, sensor_idx: int, sensor_manager: "SensorManager"): super().__init__(sensor_options, sensor_idx, sensor_manager) - # FFT-grid eligibility: requires a 2D probe layout with non-degenerate spacing. Strict regularity - # (uniform normals, orthogonal tangents, exact rectangle) is reported separately as a warning. + # Grid eligibility for spatial crosstalk: requires a 2D probe layout with non-degenerate spacing. Strict + # regularity (uniform normals, orthogonal tangents, exact rectangle) is reported separately as a warning. # Flat pos/normals are already populated by ProbeSensorMixin / ProbesWithNormalSensorMixin. is_grid = len(self._probe_layout_shape) == 2 - _, _, self._use_grid_fft, is_grid_regular, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( + _, _, self._use_grid_crosstalk, is_grid_regular, grid_normal, grid_tangent_u, grid_tangent_v, grid_spacing = ( normalize_grid_probe_layout( np.asarray(sensor_options.probe_local_pos, dtype=gs.np_float), np.asarray(sensor_options.probe_local_normal, dtype=gs.np_float), @@ -792,7 +781,7 @@ def __init__(self, sensor_options: KinematicTaxelOptions, sensor_idx: int, senso self._grid_spacing = torch.tensor(grid_spacing, dtype=gs.tc_float, device=gs.device) if self._options.crosstalk_strength > 0.0: - if not self._use_grid_fft: + if not self._use_grid_crosstalk: gs.raise_exception( "KinematicTaxel crosstalk requires a 2D grid-shaped probe_local_pos (shape (ny, nx, 3) with " f"ny, nx >= 2 and non-degenerate spacing); got shape {tuple(self._probe_layout_shape)}." @@ -800,7 +789,7 @@ def __init__(self, sensor_options: KinematicTaxelOptions, sensor_idx: int, senso if not is_grid_regular: gs.logger.warning( "KinematicTaxel crosstalk grid is not strictly regular (uniform spacing, uniform normals, " - "orthogonal tangents); FFT crosstalk will use averaged spacing and normal as a best-fit " + "orthogonal tangents); crosstalk will use averaged spacing and normal as a best-fit " "approximation." ) @@ -823,7 +812,7 @@ def build(self): self._shared_metadata.twist_scalar, float(self._options.twist_scalar), expand=(1,) ) - if self._options.crosstalk_strength > 0.0 and self._use_grid_fft: + if self._options.crosstalk_strength > 0.0 and self._use_grid_crosstalk: self._register_crosstalk() def _get_return_format(self) -> tuple[tuple[int, ...], ...]: diff --git a/genesis/options/sensors/tactile.py b/genesis/options/sensors/tactile.py index 337a98af6a..e335154bdb 100644 --- a/genesis/options/sensors/tactile.py +++ b/genesis/options/sensors/tactile.py @@ -229,9 +229,9 @@ class KinematicTaxel( If this sensor is attached to a fixed entity, it will not detect contacts with other fixed entities. ``probe_local_pos`` may be either an arbitrary set of probes with shape ``(N, 3)`` or a grid-shaped set with shape - ``(M, N, 3)``. Regular planar grids enable FFT-based spatial crosstalk on the measured branch (see - ``crosstalk_strength``). A probe whose ``probe_radius`` is 0 is treated as an inactive filler -- it reads 0 - force/torque and is skipped -- so an irregular taxel set can be padded into a regular grid for FFT crosstalk. + ``(M, N, 3)``. Regular planar grids enable spatial crosstalk on the measured branch (see ``crosstalk_strength``). + A probe whose ``probe_radius`` is 0 is treated as an inactive filler -- it reads 0 force/torque and is skipped -- + so an irregular taxel set can be padded into a regular grid for crosstalk. Parameters ---------- From 99c79afdcf342faad044cb9406fafc74648f0fa2 Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Mon, 25 May 2026 19:30:23 -0400 Subject: [PATCH 5/7] optimizations --- genesis/engine/sensors/kinematic_tactile.py | 150 +++++-- genesis/engine/sensors/point_cloud_tactile.py | 419 ++++++++---------- .../engine/sensors/surface_distance_probe.py | 304 ++++++++++--- genesis/engine/sensors/tactile_shared.py | 222 +++++++++- 4 files changed, 756 insertions(+), 339 deletions(-) diff --git a/genesis/engine/sensors/kinematic_tactile.py b/genesis/engine/sensors/kinematic_tactile.py index a61ae0aa11..41ea478b9b 100644 --- a/genesis/engine/sensors/kinematic_tactile.py +++ b/genesis/engine/sensors/kinematic_tactile.py @@ -28,6 +28,7 @@ get_measured_bufs, ) from .tactile_shared import ( + ContactPrefilterMetadataMixin, ViscoelasticHysteresisMetadataMixin, ViscoelasticHysteresisMixin, normalize_grid_probe_layout, @@ -44,6 +45,7 @@ @qd.func def _func_query_contact_depth_penetration( i_b: int, + i_s: int, probe_pos: qd.types.vector(3), probe_radius_gt: float, probe_radius_m: float, @@ -51,25 +53,23 @@ def _func_query_contact_depth_penetration( geoms_info: array_class.GeomsInfo, geoms_state: array_class.GeomsState, collider_state: array_class.ColliderState, + sensor_contacts_idx: qd.types.ndarray(), + sensor_n_contacts: qd.types.ndarray(), sdf_info: array_class.SDFInfo, ): """ Max probe penetration from SDF for contacts involving the sensor link, dual-radius. - - Returns ``(max_pen_gt, max_pen_m)`` from a single SDF pass: both penetrations come from the same ``sd`` per contact - via ``pen = radius - sd``. Callers that do not need the noised-radius branch pass ``probe_radius_m == - probe_radius_gt`` and ignore the second return. """ max_pen_gt = gs.qd_float(0.0) max_pen_m = gs.qd_float(0.0) - n_contacts = collider_state.n_contacts[i_b] - for i_c in range(n_contacts): - i_col = collider_state.contact_sort_idx[i_c, i_b] - c_link_a = collider_state.contact_data.link_a[i_col, i_b] - c_link_b = collider_state.contact_data.link_b[i_col, i_b] - c_geom_a = collider_state.contact_data.geom_a[i_col, i_b] - c_geom_b = collider_state.contact_data.geom_b[i_col, i_b] + n_c = sensor_n_contacts[i_b, i_s] + for k in range(n_c): + i_c = sensor_contacts_idx[i_b, i_s, k] + c_link_a = collider_state.contact_data.link_a[i_c, i_b] + c_link_b = collider_state.contact_data.link_b[i_c, i_b] + c_geom_a = collider_state.contact_data.geom_a[i_c, i_b] + c_geom_b = collider_state.contact_data.geom_b[i_c, i_b] for side in qd.static(range(2)): c_link = c_link_a if side == 0 else c_link_b @@ -89,9 +89,50 @@ def _func_query_contact_depth_penetration( return max_pen_gt, max_pen_m +# Per-(env, sensor) cap on the prefiltered contact list consumed by ``_func_query_contact_depth`` +# and ``_func_query_contact_depth_penetration``. Sensors track a single rigid link; even with multicontact +# and many neighbouring geoms, the count of contacts touching one link rarely exceeds a few hundred. +_MAX_CONTACTS_PER_SENSOR = 1024 + + +@qd.kernel +def _kernel_build_sensor_contact_idx( + sensor_link_idx: qd.types.ndarray(), + collider_state: array_class.ColliderState, + sensor_contacts_idx: qd.types.ndarray(), + sensor_n_contacts: qd.types.ndarray(), +): + """ + Per-(env, sensor) compact contact index for the KinematicTaxel pre-pass. + + Parallelizes over ``(n_batches, n_sensors)``: each thread scans the collider's contact list once and writes + the indices of contacts whose ``link_a`` or ``link_b`` equals the sensor's tracked link. Drops the main + kernel's per-probe contact-list scan from O(n_probes * n_contacts) to O(n_probes * sensor_n_contacts). + Cap-overflows (count >= last dim of ``sensor_contacts_idx``) silently truncate; see the module-level + ``_MAX_CONTACTS_PER_SENSOR`` comment. + """ + n_sensors = sensor_link_idx.shape[0] + n_batches = sensor_n_contacts.shape[0] + max_per_sensor = sensor_contacts_idx.shape[2] + for i_b, i_s in qd.ndrange(n_batches, n_sensors): + link = sensor_link_idx[i_s] + count = gs.qd_int(0) + n_c = collider_state.n_contacts[i_b] + for i_c in range(n_c): + if count >= max_per_sensor: + break + la = collider_state.contact_data.link_a[i_c, i_b] + lb = collider_state.contact_data.link_b[i_c, i_b] + if la == link or lb == link: + sensor_contacts_idx[i_b, i_s, count] = i_c + count = count + 1 + sensor_n_contacts[i_b, i_s] = count + + @qd.func def _func_query_contact_depth( i_b: int, + i_s: int, probe_pos: qd.types.vector(3), probe_radius_gt: float, probe_radius_m: float, @@ -101,15 +142,18 @@ def _func_query_contact_depth( rigid_global_info: array_class.RigidGlobalInfo, collider_static_config: qd.template(), collider_state: array_class.ColliderState, + sensor_contacts_idx: qd.types.ndarray(), + sensor_n_contacts: qd.types.ndarray(), sdf_info: array_class.SDFInfo, eps: float, ): """ Dual-radius probe query: single SDF + normal pass yielding both GT and noised-radius results. - Returns ``(max_pen_gt, contact_link_gt, contact_normal_gt, max_pen_m, contact_link_m, contact_normal_m)``. AABB - pre-filter expands by ``max(probe_radius_gt, probe_radius_m)`` so neither branch is silently skipped. Callers - without a noised radius pass ``probe_radius_m == probe_radius_gt``. + Iterates only the per-(env, sensor) prefiltered contact list built by ``_kernel_build_sensor_contact_idx``; + every contact in that list has ``link_a`` or ``link_b`` equal to ``sensor_link_idx``. AABB pre-filter + expands by ``max(probe_radius_gt, probe_radius_m)`` so neither branch is silently skipped. Callers without + a noised radius pass ``probe_radius_m == probe_radius_gt``. """ max_pen_gt = gs.qd_float(0.0) contact_link_gt = gs.qd_int(-1) @@ -119,16 +163,16 @@ def _func_query_contact_depth( contact_normal_m = qd.Vector.zero(gs.qd_float, 3) aabb_expansion = qd.max(probe_radius_gt, probe_radius_m) - # Iterate over contacts directly from collider state; each contact may have the sensor link on either side. - n_contacts = collider_state.n_contacts[i_b] - for i_c in range(n_contacts): - i_col = collider_state.contact_sort_idx[i_c, i_b] - c_link_a = collider_state.contact_data.link_a[i_col, i_b] - c_link_b = collider_state.contact_data.link_b[i_col, i_b] - c_geom_a = collider_state.contact_data.geom_a[i_col, i_b] - c_geom_b = collider_state.contact_data.geom_b[i_col, i_b] - - # Check if either side of this contact involves the sensor link. + n_c = sensor_n_contacts[i_b, i_s] + for k in range(n_c): + i_c = sensor_contacts_idx[i_b, i_s, k] + c_link_a = collider_state.contact_data.link_a[i_c, i_b] + c_link_b = collider_state.contact_data.link_b[i_c, i_b] + c_geom_a = collider_state.contact_data.geom_a[i_c, i_b] + c_geom_b = collider_state.contact_data.geom_b[i_c, i_b] + + # Each prefiltered contact touches the sensor link on at least one side; check both since the link + # may appear as link_a, link_b, or (degenerately) both. for side in qd.static(range(2)): c_link = c_link_a if side == 0 else c_link_b i_g = c_geom_b if side == 0 else c_geom_a @@ -226,6 +270,8 @@ def _kernel_kinematic_taxel( sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), n_probes_per_sensor: qd.types.ndarray(), + sensor_contacts_idx: qd.types.ndarray(), + sensor_n_contacts: qd.types.ndarray(), collider_state: array_class.ColliderState, collider_static_config: qd.template(), links_state: array_class.LinksState, @@ -284,6 +330,7 @@ def _kernel_kinematic_taxel( contact_normal_m, ) = _func_query_contact_depth( i_b, + i_s, probe_pos, probe_radius, probe_radius_m, @@ -293,6 +340,8 @@ def _kernel_kinematic_taxel( rigid_global_info, collider_static_config, collider_state, + sensor_contacts_idx, + sensor_n_contacts, sdf_info, eps, ) @@ -355,6 +404,8 @@ def _kernel_contact_depth_probe( links_idx: qd.types.ndarray(), sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), + sensor_contacts_idx: qd.types.ndarray(), + sensor_n_contacts: qd.types.ndarray(), collider_state: array_class.ColliderState, links_state: array_class.LinksState, geoms_state: array_class.GeomsState, @@ -394,6 +445,7 @@ def _kernel_contact_depth_probe( max_penetration_gt, max_penetration_m = _func_query_contact_depth_penetration( i_b, + i_s, probe_pos, probe_radius, probe_radius_m, @@ -401,6 +453,8 @@ def _kernel_contact_depth_probe( geoms_info, geoms_state, collider_state, + sensor_contacts_idx, + sensor_n_contacts, sdf_info, ) # Per-(env, probe) gain on the measured-branch depth only. @@ -420,6 +474,7 @@ def build(self): class ContactDepthProbeMetadata( ViscoelasticHysteresisMetadataMixin, ProbeSensorMetadataMixin, + ContactPrefilterMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata, ): @@ -432,7 +487,19 @@ class ContactDepthProbeSensor( RigidSensorMixin[ContactDepthProbeMetadata], SimpleSensor[ContactDepthProbeOptions, ContactDepthProbeMetadata, tuple], ): - """Returns contact depth in meters per probe.""" + """ + Returns contact depth in meters per probe. + """ + + def build(self): + super().build() + # Re-allocate the per-(env, sensor) contact prefilter buffers to absorb the newly-registered sensor. + B = self._manager._sim._B + n_sensors_built = self._shared_metadata.n_probes_per_sensor.shape[0] + self._shared_metadata.sensor_contacts_idx = torch.zeros( + (B, n_sensors_built, _MAX_CONTACTS_PER_SENSOR), dtype=gs.tc_int, device=gs.device + ) + self._shared_metadata.sensor_n_contacts = torch.zeros((B, n_sensors_built), dtype=gs.tc_int, device=gs.device) def _get_return_format(self) -> tuple[int, ...]: return self._probe_layout_shape @@ -453,6 +520,12 @@ def _update_current_timestep_data( measured, measured_cols_b = get_measured_bufs( shared_metadata, current_ground_truth_data_T, measured_data_timeline ) + _kernel_build_sensor_contact_idx( + shared_metadata.links_idx, + solver.collider._collider_state, + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, + ) _kernel_contact_depth_probe( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, @@ -462,6 +535,8 @@ def _update_current_timestep_data( shared_metadata.links_idx, shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, solver.collider._collider_state, solver.links_state, solver.geoms_state, @@ -581,6 +656,7 @@ class KinematicTaxelData(NamedTuple): class KinematicTaxelMetadata( ViscoelasticHysteresisMetadataMixin, ProbesWithNormalSensorMetadataMixin, + ContactPrefilterMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata, ): @@ -590,11 +666,10 @@ class KinematicTaxelMetadata( shear_scalar: torch.Tensor = make_tensor_field((0,)) twist_scalar: torch.Tensor = make_tensor_field((0,)) - # Per-sensor spatial crosstalk state. Each entry of ``crosstalk_meta`` is + # Per-sensor spatial crosstalk state. ``crosstalk_meta[i]`` is # ``(g_ny, g_nx, probe_start, cache_start, strength, r_v, r_u)``; ``crosstalk_kernels_{v,u}`` hold the matching - # depthwise 1D Gaussian weights pre-shaped for ``F.conv2d`` with ``groups=6``. The 2D Gaussian is applied as two - # separable depthwise passes (kv along v-axis then ku along u-axis) on a per-sensor ``(B, 6, g_ny, g_nx)`` - # temporary, with an identity blend ``(1 - strength) * x + strength * conv(x)``. No persistent buffer is held. + # depthwise 1D Gaussian weights pre-shaped for ``F.conv2d`` with ``groups=6``. Applied as two separable + # passes (kv then ku) with an identity blend ``(1 - strength) * x + strength * conv(x)``. crosstalk_meta: list[tuple] = field(default_factory=list) crosstalk_kernels_v: list[torch.Tensor] = field(default_factory=list) crosstalk_kernels_u: list[torch.Tensor] = field(default_factory=list) @@ -815,6 +890,15 @@ def build(self): if self._options.crosstalk_strength > 0.0 and self._use_grid_crosstalk: self._register_crosstalk() + # Re-allocate the per-(env, sensor) contact prefilter buffers to absorb the newly-registered sensor. + # Sized at build time; the per-step kernel writes into the same buffers without further allocation. + B = self._manager._sim._B + n_sensors_built = self._shared_metadata.n_probes_per_sensor.shape[0] + self._shared_metadata.sensor_contacts_idx = torch.zeros( + (B, n_sensors_built, _MAX_CONTACTS_PER_SENSOR), dtype=gs.tc_int, device=gs.device + ) + self._shared_metadata.sensor_n_contacts = torch.zeros((B, n_sensors_built), dtype=gs.tc_int, device=gs.device) + def _get_return_format(self) -> tuple[tuple[int, ...], ...]: shape = (*self._probe_layout_shape, 3) return shape, shape @@ -840,6 +924,12 @@ def _update_current_timestep_data( measured_equals_gt = int( not shared_metadata.has_any_probe_radius_noise and not shared_metadata.has_any_probe_gain ) + _kernel_build_sensor_contact_idx( + shared_metadata.links_idx, + solver.collider._collider_state, + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, + ) _kernel_kinematic_taxel( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, @@ -855,6 +945,8 @@ def _update_current_timestep_data( shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, shared_metadata.n_probes_per_sensor, + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, solver.collider._collider_state, solver.collider._collider_static_config, solver.links_state, diff --git a/genesis/engine/sensors/point_cloud_tactile.py b/genesis/engine/sensors/point_cloud_tactile.py index e20908d375..d5d6d34c9a 100644 --- a/genesis/engine/sensors/point_cloud_tactile.py +++ b/genesis/engine/sensors/point_cloud_tactile.py @@ -24,9 +24,17 @@ get_measured_bufs, ) from .tactile_shared import ( + BVH_LEAF_SIZE, + BVH_STACK_SIZE, + BVHMetadata, GridFFTConvMetadataMixin, ViscoelasticHysteresisMetadataMixin, ViscoelasticHysteresisMixin, + build_static_chunk_bvh, + func_aabb_intersects_aabb, + func_sphere_intersects_aabb, + func_vec3_at, + get_mesh_geom_chunks, next_pow2, normalize_grid_probe_layout, register_grid_fft_sensor, @@ -40,30 +48,6 @@ from .sensor_manager import SensorManager -def _get_mesh_geom_chunks(link, prefer_visual: bool) -> list[tuple[object, np.ndarray, np.ndarray]]: - """Return per-geom mesh chunks in link-local frame.""" - if prefer_visual: - geoms = list(link.vgeoms) if link.vgeoms else list(link.geoms) - use_vverts = bool(link.vgeoms) - else: - geoms = list(link.geoms) if link.geoms else list(link.vgeoms) - use_vverts = not bool(link.geoms) and bool(link.vgeoms) - - chunks: list[tuple[object, np.ndarray, np.ndarray]] = [] - for geom in geoms: - if use_vverts: - verts = np.asarray(geom.init_vverts, dtype=np.float32) - faces = np.asarray(geom.init_vfaces, dtype=np.int32) - else: - verts = np.asarray(geom.init_verts, dtype=np.float32) - faces = np.asarray(geom.init_faces, dtype=np.int32) - if verts.size == 0 or faces.size == 0: - continue - verts_link = gu.transform_by_trans_quat(verts, geom.init_pos, geom.init_quat) - chunks.append((geom, verts_link.astype(np.float32, copy=False), np.asarray(faces, dtype=np.int32))) - return chunks - - def _n_sample_points_per_link(n_sample_points: int | list | tuple, n_links: int) -> list[int]: if n_links <= 0: return [] @@ -143,7 +127,7 @@ def _sample_track_links_point_cloud_tensors( n_pts = n_per_link[i_l] link_idx = int(track_link_idx[i_l]) link = solver.links[link_idx] - geom_chunks = _get_mesh_geom_chunks(link, prefer_visual) + geom_chunks = get_mesh_geom_chunks(link, prefer_visual) if not geom_chunks: gs.raise_exception(f"No mesh geometry on tracked link index {link_idx}.") for n_geom_pts, (geom, verts, faces) in zip(_split_count_by_area(n_pts, geom_chunks), geom_chunks): @@ -172,116 +156,27 @@ def _sample_track_links_point_cloud_tensors( ) -_POINT_CLOUD_BVH_LEAF_SIZE = 8 -_POINT_CLOUD_BVH_STACK_SIZE = 32 _ELASTOMER_QUERY_AABB_MARGIN = 1e-3 -def _build_static_chunk_bvh(points: np.ndarray, global_rows: np.ndarray, leaf_size: int): - """Median-split AABB BVH over ``points`` (a single tracked link's local-frame point cloud). - - Leaves carry the caller-provided ``global_rows`` (absolute rows into ``pc_pos_link``); the - kernel can therefore index directly into the per-class point-cloud tensors with no extra - indirection. Internal nodes use -1 for ``node_left`` / ``node_right``. - """ - node_min: list[np.ndarray] = [] - node_max: list[np.ndarray] = [] - node_left: list[int] = [] - node_right: list[int] = [] - node_point_start: list[int] = [] - node_point_n: list[int] = [] - point_idx: list[int] = [] - - def _alloc() -> int: - i = len(node_min) - node_min.append(np.zeros(3, dtype=np.float32)) - node_max.append(np.zeros(3, dtype=np.float32)) - node_left.append(-1) - node_right.append(-1) - node_point_start.append(-1) - node_point_n.append(0) - return i - - def _build(rows: np.ndarray, pts: np.ndarray) -> int: - nid = _alloc() - bmin = pts.min(axis=0).astype(np.float32) - bmax = pts.max(axis=0).astype(np.float32) - node_min[nid] = bmin - node_max[nid] = bmax - if rows.shape[0] <= leaf_size: - start = len(point_idx) - point_idx.extend(int(r) for r in rows) - node_point_start[nid] = start - node_point_n[nid] = int(rows.shape[0]) - return nid - axis = int(np.argmax(bmax - bmin)) - order = np.argsort(pts[:, axis], kind="stable") - mid = order.shape[0] // 2 - node_left[nid] = _build(rows[order[:mid]], pts[order[:mid]]) - node_right[nid] = _build(rows[order[mid:]], pts[order[mid:]]) - return nid - - if points.shape[0] == 0: - return ( - np.zeros((0, 3), dtype=np.float32), - np.zeros((0, 3), dtype=np.float32), - np.zeros((0,), dtype=np.int32), - np.zeros((0,), dtype=np.int32), - np.zeros((0,), dtype=np.int32), - np.zeros((0,), dtype=np.int32), - np.zeros((0,), dtype=np.int32), - ) - - root = _build(global_rows.astype(np.int32, copy=False), points.astype(np.float32, copy=False)) - assert root == 0 - return ( - np.stack(node_min, axis=0), - np.stack(node_max, axis=0), - np.asarray(node_left, dtype=np.int32), - np.asarray(node_right, dtype=np.int32), - np.asarray(node_point_start, dtype=np.int32), - np.asarray(node_point_n, dtype=np.int32), - np.asarray(point_idx, dtype=np.int32), - ) - - @dataclass -class PointCloudBVH: - """Static link-local BVH over the tracked point clouds of one sensor class. - - One chunk per (sensor, tracked_link); each chunk is built once in that link's LOCAL frame at - scene-build time and never rebuilt. Queries must transform into a chunk's link-local frame at - query time. Chunk nodes are flat-packed across all chunks: chunk_node_start/n delimits each - chunk's contiguous run; node_left/right are ABSOLUTE indices into the flat node tensors (-1 - for leaves). Leaves' point_idx values are absolute rows into pc_pos_link / pc_active_envs_mask - / pc_normal_link, so a leaf hit resolves to per-point data with one indirection. +class PointCloudBVH(BVHMetadata): + """ + BVH over the tracked point clouds of one sensor class. ``leaf_elem_idx`` entries are absolute + rows into ``pc_pos_link`` / ``pc_active_envs_mask`` / ``pc_normal_link`` so a leaf hit resolves + to per-point data with one indirection. See ``BVHMetadata`` for the shared scaffolding semantics. """ - sensor_chunk_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - sensor_chunk_n: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - - chunk_link_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - # Inverse of sensor_chunk_start/n: chunk_sensor_idx[i_c] is the owning sensor's index. Enables - # (env, chunk)-parallel kernels without redundant per-thread scans of sensor_chunk_start. + # Inverse of sensor_chunk_start/count: chunk_sensor_idx[i_c] is the owning sensor's index. Enables + # (env, chunk)-parallel kernels (e.g. ElastomerTaxel surface state) without rescanning sensor_chunk_start + # in every thread; ProximityTaxel parallelizes per-probe and does not consume this field. chunk_sensor_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - chunk_node_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - chunk_node_n: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - - node_min: torch.Tensor = make_tensor_field((0, 3)) - node_max: torch.Tensor = make_tensor_field((0, 3)) - node_left: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - node_right: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - node_point_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - node_point_n: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) - - point_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) def append_sensor(self, *, pc_start_row: int, idx_cat: torch.Tensor, pos_cat: torch.Tensor) -> None: - """Build per-tracked-link chunks for one sensor and append into the flat tensors. - - Must be called immediately after extending ``pc_pos_link`` by ``pos_cat`` โ€” each leaf's - ``point_idx`` entry is ``pc_start_row + local_row`` and must address the freshly-grown - rows. + """ + Build per-tracked-link chunks for one sensor and append into the flat tensors. Must be called + immediately after extending ``pc_pos_link`` by ``pos_cat`` so each leaf's element index + (``pc_start_row + local_row``) addresses the freshly-grown rows. """ n_local = int(pos_cat.shape[0]) if n_local == 0: @@ -293,40 +188,42 @@ def append_sensor(self, *, pc_start_row: int, idx_cat: torch.Tensor, pos_cat: to chunk_start_for_sensor = int(self.chunk_link_idx.shape[0]) node_offset = int(self.node_min.shape[0]) - point_offset = int(self.point_idx.shape[0]) + point_offset = int(self.leaf_elem_idx.shape[0]) new_chunk_link_idx: list[int] = [] new_chunk_node_start: list[int] = [] - new_chunk_node_n: list[int] = [] + new_chunk_node_count: list[int] = [] all_node_min: list[np.ndarray] = [] all_node_max: list[np.ndarray] = [] all_node_left: list[np.ndarray] = [] all_node_right: list[np.ndarray] = [] - all_node_point_start: list[np.ndarray] = [] - all_node_point_n: list[np.ndarray] = [] - all_point_idx: list[np.ndarray] = [] + all_node_leaf_start: list[np.ndarray] = [] + all_node_leaf_count: list[np.ndarray] = [] + all_leaf_elem_idx: list[np.ndarray] = [] for link_idx in unique_links: local_rows = np.nonzero(idx_np == int(link_idx))[0].astype(np.int32) global_rows = (int(pc_start_row) + local_rows).astype(np.int32) pts_link = pos_np[local_rows] - nmin, nmax, nleft, nright, npstart, npn, pidx = _build_static_chunk_bvh( - pts_link, global_rows, _POINT_CLOUD_BVH_LEAF_SIZE + # Point cloud: AABB per element is degenerate (the point itself), so pass the points as both + # centroids and the per-element min/max bounds. + nmin, nmax, nleft, nright, npstart, npn, pidx = build_static_chunk_bvh( + pts_link, pts_link, pts_link, global_rows, BVH_LEAF_SIZE ) new_chunk_link_idx.append(int(link_idx)) new_chunk_node_start.append(node_offset) - new_chunk_node_n.append(int(nmin.shape[0])) + new_chunk_node_count.append(int(nmin.shape[0])) all_node_min.append(nmin) all_node_max.append(nmax) # Rebase intra-chunk child / leaf-start indices into the flat tensors' absolute space. all_node_left.append(np.where(nleft >= 0, nleft + node_offset, nleft).astype(np.int32)) all_node_right.append(np.where(nright >= 0, nright + node_offset, nright).astype(np.int32)) - all_node_point_start.append(np.where(npn > 0, npstart + point_offset, npstart).astype(np.int32)) - all_node_point_n.append(npn) - all_point_idx.append(pidx) + all_node_leaf_start.append(np.where(npn > 0, npstart + point_offset, npstart).astype(np.int32)) + all_node_leaf_count.append(npn) + all_leaf_elem_idx.append(pidx) node_offset += int(nmin.shape[0]) point_offset += int(pidx.shape[0]) @@ -335,12 +232,12 @@ def append_sensor(self, *, pc_start_row: int, idx_cat: torch.Tensor, pos_cat: to nx = torch.tensor(np.concatenate(all_node_max, axis=0), dtype=gs.tc_float, device=gs.device) nl = torch.tensor(np.concatenate(all_node_left, axis=0), dtype=gs.tc_int, device=gs.device) nr = torch.tensor(np.concatenate(all_node_right, axis=0), dtype=gs.tc_int, device=gs.device) - nps = torch.tensor(np.concatenate(all_node_point_start, axis=0), dtype=gs.tc_int, device=gs.device) - npn_t = torch.tensor(np.concatenate(all_node_point_n, axis=0), dtype=gs.tc_int, device=gs.device) - pidx_t = torch.tensor(np.concatenate(all_point_idx, axis=0), dtype=gs.tc_int, device=gs.device) + nps = torch.tensor(np.concatenate(all_node_leaf_start, axis=0), dtype=gs.tc_int, device=gs.device) + npn_t = torch.tensor(np.concatenate(all_node_leaf_count, axis=0), dtype=gs.tc_int, device=gs.device) + pidx_t = torch.tensor(np.concatenate(all_leaf_elem_idx, axis=0), dtype=gs.tc_int, device=gs.device) cli = torch.tensor(new_chunk_link_idx, dtype=gs.tc_int, device=gs.device) cns = torch.tensor(new_chunk_node_start, dtype=gs.tc_int, device=gs.device) - cnn = torch.tensor(new_chunk_node_n, dtype=gs.tc_int, device=gs.device) + cnn = torch.tensor(new_chunk_node_count, dtype=gs.tc_int, device=gs.device) # Sensor index for this batch of chunks = current sensor count (the entry we're about to add). sensor_idx_for_chunks = int(self.sensor_chunk_start.shape[0]) csi = torch.full((len(unique_links),), sensor_idx_for_chunks, dtype=gs.tc_int, device=gs.device) @@ -349,51 +246,15 @@ def append_sensor(self, *, pc_start_row: int, idx_cat: torch.Tensor, pos_cat: to self.node_max = concat_with_tensor(self.node_max, nx, expand=(nx.shape[0], 3)) self.node_left = concat_with_tensor(self.node_left, nl, expand=(nl.shape[0],)) self.node_right = concat_with_tensor(self.node_right, nr, expand=(nr.shape[0],)) - self.node_point_start = concat_with_tensor(self.node_point_start, nps, expand=(nps.shape[0],)) - self.node_point_n = concat_with_tensor(self.node_point_n, npn_t, expand=(npn_t.shape[0],)) - self.point_idx = concat_with_tensor(self.point_idx, pidx_t, expand=(pidx_t.shape[0],)) + self.node_leaf_start = concat_with_tensor(self.node_leaf_start, nps, expand=(nps.shape[0],)) + self.node_leaf_count = concat_with_tensor(self.node_leaf_count, npn_t, expand=(npn_t.shape[0],)) + self.leaf_elem_idx = concat_with_tensor(self.leaf_elem_idx, pidx_t, expand=(pidx_t.shape[0],)) self.chunk_link_idx = concat_with_tensor(self.chunk_link_idx, cli, expand=(cli.shape[0],)) self.chunk_sensor_idx = concat_with_tensor(self.chunk_sensor_idx, csi, expand=(csi.shape[0],)) self.chunk_node_start = concat_with_tensor(self.chunk_node_start, cns, expand=(cns.shape[0],)) - self.chunk_node_n = concat_with_tensor(self.chunk_node_n, cnn, expand=(cnn.shape[0],)) + self.chunk_node_count = concat_with_tensor(self.chunk_node_count, cnn, expand=(cnn.shape[0],)) self.sensor_chunk_start = concat_with_tensor(self.sensor_chunk_start, chunk_start_for_sensor, expand=(1,)) - self.sensor_chunk_n = concat_with_tensor(self.sensor_chunk_n, len(unique_links), expand=(1,)) - - -@qd.func -def _func_vec3_at(values: qd.types.ndarray(), i: int) -> qd.types.vector(3): - return qd.Vector([values[i, 0], values[i, 1], values[i, 2]], dt=float) - - -@qd.func -def _func_sphere_intersects_aabb(center, radius_sq, bmin, bmax): # -> bool - """Squared-distance sphere-vs-AABB test: returns True iff the closest point of the AABB to - ``center`` is within ``radius_sq``. Used by ProximityTaxel BVH traversal.""" - d_sq = gs.qd_float(0.0) - for k in qd.static(range(3)): - v = center[k] - lo = bmin[k] - hi = bmax[k] - if v < lo: - d = lo - v - d_sq = d_sq + d * d - elif v > hi: - d = v - hi - d_sq = d_sq + d * d - return d_sq <= radius_sq - - -@qd.func -def _func_aabb_intersects_aabb(amin, amax, bmin, bmax): # -> bool - """Standard 6-axis AABB-vs-AABB overlap test. Used by ElastomerTaxel BVH traversal.""" - return ( - amin[0] <= bmax[0] - and amax[0] >= bmin[0] - and amin[1] <= bmax[1] - and amax[1] >= bmin[1] - and amin[2] <= bmax[2] - and amax[2] >= bmin[2] - ) + self.sensor_chunk_count = concat_with_tensor(self.sensor_chunk_count, len(unique_links), expand=(1,)) @qd.kernel @@ -406,16 +267,16 @@ def _kernel_point_cloud_proximity_taxel_bvh( sensor_probe_start: qd.types.ndarray(), n_probes_per_sensor: qd.types.ndarray(), bvh_sensor_chunk_start: qd.types.ndarray(), - bvh_sensor_chunk_n: qd.types.ndarray(), + bvh_sensor_chunk_count: qd.types.ndarray(), bvh_chunk_link_idx: qd.types.ndarray(), bvh_chunk_node_start: qd.types.ndarray(), bvh_node_min: qd.types.ndarray(), bvh_node_max: qd.types.ndarray(), bvh_node_left: qd.types.ndarray(), bvh_node_right: qd.types.ndarray(), - bvh_node_point_start: qd.types.ndarray(), - bvh_node_point_n: qd.types.ndarray(), - bvh_point_idx: qd.types.ndarray(), + bvh_node_leaf_start: qd.types.ndarray(), + bvh_node_leaf_count: qd.types.ndarray(), + bvh_leaf_elem_idx: qd.types.ndarray(), pc_pos_link: qd.types.ndarray(), pc_active_envs_mask: qd.types.ndarray(), probe_radii: qd.types.ndarray(), @@ -450,10 +311,10 @@ def _kernel_point_cloud_proximity_taxel_bvh( s_ang = links_state.cd_ang[sensor_link_idx, i_b] s_com = links_state.root_COM[sensor_link_idx, i_b] - probe_local = _func_vec3_at(probe_positions_local, i_p) + probe_local = func_vec3_at(probe_positions_local, i_p) probe_world = s_pos + gu.qd_transform_by_quat(probe_local, s_quat) - a_loc = _func_vec3_at(probe_local_normal, i_p) + a_loc = func_vec3_at(probe_local_normal, i_p) a_w = gu.qd_transform_by_quat(a_loc, s_quat) a_norm = qd.sqrt(a_w.dot(a_w)) + eps for j in qd.static(range(3)): @@ -481,7 +342,7 @@ def _kernel_point_cloud_proximity_taxel_bvh( tau_w_m = qd.Vector.zero(gs.qd_float, 3) chunk_start = bvh_sensor_chunk_start[i_s] - n_chunks = bvh_sensor_chunk_n[i_s] + n_chunks = bvh_sensor_chunk_count[i_s] for c_off in range(n_chunks): i_c = chunk_start + c_off track_link_idx = bvh_chunk_link_idx[i_c] @@ -493,26 +354,26 @@ def _kernel_point_cloud_proximity_taxel_bvh( # BVH nodes live in tracked-link local frame: bring the probe sphere center over. probe_link = gu.qd_inv_transform_by_trans_quat(probe_world, track_pos, track_quat) - stack = qd.Vector.zero(gs.qd_int, qd.static(_POINT_CLOUD_BVH_STACK_SIZE)) + stack = qd.Vector.zero(gs.qd_int, qd.static(BVH_STACK_SIZE)) stack[0] = bvh_chunk_node_start[i_c] stack_idx = 1 while stack_idx > 0: stack_idx -= 1 n = stack[stack_idx] - bmin = _func_vec3_at(bvh_node_min, n) - bmax = _func_vec3_at(bvh_node_max, n) - if not _func_sphere_intersects_aabb(probe_link, R_query_sq, bmin, bmax): + bmin = func_vec3_at(bvh_node_min, n) + bmax = func_vec3_at(bvh_node_max, n) + if not func_sphere_intersects_aabb(probe_link, R_query_sq, bmin, bmax): continue left = bvh_node_left[n] if left == -1: - pstart = bvh_node_point_start[n] - pn = bvh_node_point_n[n] + pstart = bvh_node_leaf_start[n] + pn = bvh_node_leaf_count[n] for j in range(pn): - i_o = bvh_point_idx[pstart + j] + i_o = bvh_leaf_elem_idx[pstart + j] if not pc_active_envs_mask[i_o, i_b]: continue - pos_l = _func_vec3_at(pc_pos_link, i_o) + pos_l = func_vec3_at(pc_pos_link, i_o) d_link = pos_l - probe_link dsq = d_link.dot(d_link) dist = qd.sqrt(dsq) @@ -672,7 +533,7 @@ def build(self): self._shared_metadata.sensor_pc_n, self._shared_metadata.pc_pos_link.shape[0] - pc_start_row, expand=(1,) ) - # BVH growth follows pc_pos_link growth in lockstep: each leaf's point_idx is an absolute + # BVH growth follows pc_pos_link growth in lockstep: each leaf's leaf_elem_idx is an absolute # row into the just-grown pc_pos_link. self._shared_metadata.pc_bvh.append_sensor( pc_start_row=pc_start_row, @@ -814,16 +675,16 @@ def _update_current_timestep_data( shared_metadata.sensor_probe_start, shared_metadata.n_probes_per_sensor, bvh.sensor_chunk_start, - bvh.sensor_chunk_n, + bvh.sensor_chunk_count, bvh.chunk_link_idx, bvh.chunk_node_start, bvh.node_min, bvh.node_max, bvh.node_left, bvh.node_right, - bvh.node_point_start, - bvh.node_point_n, - bvh.point_idx, + bvh.node_leaf_start, + bvh.node_leaf_count, + bvh.leaf_elem_idx, shared_metadata.pc_pos_link, shared_metadata.pc_active_envs_mask, shared_metadata.probe_radii, @@ -846,8 +707,9 @@ def _draw_debug(self, context: "RasterizerContext"): self._draw_debug_probes( context, self._tactile_color_groups_fn( - lambda envs_idx: self._debug_probe_buffer_magnitudes(self._shared_metadata.taxel_signal_buf, envs_idx) - >= gs.EPS, + lambda envs_idx: ( + self._debug_probe_buffer_magnitudes(self._shared_metadata.taxel_signal_buf, envs_idx) >= gs.EPS + ), ), ) @@ -1035,7 +897,7 @@ def _kernel_elastomer_probe_depth( sensor_link_idx = links_idx[i_s] link_pos = links_state.pos[sensor_link_idx, i_b] link_quat = links_state.quat[sensor_link_idx, i_b] - probe_local = _func_vec3_at(probe_positions_local, i_p) + probe_local = func_vec3_at(probe_positions_local, i_p) probe_world = link_pos + gu.qd_transform_by_quat(probe_local, link_quat) min_sdf = _func_elastomer_min_sdf_over_active_geoms( @@ -1095,8 +957,8 @@ def _kernel_elastomer_dilate_accumulate( output[cache_start + _i_p * 3 + k, i_b] = gs.qd_float(0.0) continue - target_local = _func_vec3_at(probe_positions_local, i_p) - target_normal = _func_vec3_at(probe_local_normal, i_p) + target_local = func_vec3_at(probe_positions_local, i_p) + target_normal = func_vec3_at(probe_local_normal, i_p) acc = qd.Vector.zero(gs.qd_float, 3) for j in range(n_probes): @@ -1105,7 +967,7 @@ def _kernel_elastomer_dilate_accumulate( if src_depth <= gs.qd_float(0.0): continue contribution = _func_elastomer_direct_dilate_contribution( - _func_vec3_at(probe_positions_local, j_p), + func_vec3_at(probe_positions_local, j_p), target_local, target_normal, src_depth, @@ -1134,9 +996,9 @@ def _kernel_elastomer_surface_state_bvh( bvh_node_max: qd.types.ndarray(), bvh_node_left: qd.types.ndarray(), bvh_node_right: qd.types.ndarray(), - bvh_node_point_start: qd.types.ndarray(), - bvh_node_point_n: qd.types.ndarray(), - bvh_point_idx: qd.types.ndarray(), + bvh_node_leaf_start: qd.types.ndarray(), + bvh_node_leaf_count: qd.types.ndarray(), + bvh_leaf_elem_idx: qd.types.ndarray(), pc_pos_link: qd.types.ndarray(), pc_active_envs_mask: qd.types.ndarray(), sdf_enter: qd.types.ndarray(), @@ -1221,28 +1083,28 @@ def _kernel_elastomer_surface_state_bvh( sensor_pos = links_state.pos[sensor_link_idx, i_b] sensor_quat = links_state.quat[sensor_link_idx, i_b] - stack = qd.Vector.zero(gs.qd_int, qd.static(_POINT_CLOUD_BVH_STACK_SIZE)) + stack = qd.Vector.zero(gs.qd_int, qd.static(BVH_STACK_SIZE)) stack[0] = bvh_chunk_node_start[i_c] stack_idx = 1 while stack_idx > 0: stack_idx -= 1 n = stack[stack_idx] - bmin = _func_vec3_at(bvh_node_min, n) - bmax = _func_vec3_at(bvh_node_max, n) - if not _func_aabb_intersects_aabb(bmin, bmax, qmin, qmax): + bmin = func_vec3_at(bvh_node_min, n) + bmax = func_vec3_at(bvh_node_max, n) + if not func_aabb_intersects_aabb(bmin, bmax, qmin, qmax): continue left = bvh_node_left[n] if left == -1: - pstart = bvh_node_point_start[n] - pn = bvh_node_point_n[n] + pstart = bvh_node_leaf_start[n] + pn = bvh_node_leaf_count[n] for j in range(pn): - i_o = bvh_point_idx[pstart + j] + i_o = bvh_leaf_elem_idx[pstart + j] if not pc_active_envs_mask[i_o, i_b]: continue surface_candidate_buf[i_b, i_o] = True - point_link = _func_vec3_at(pc_pos_link, i_o) + point_link = func_vec3_at(pc_pos_link, i_o) point_world = track_pos + gu.qd_transform_by_quat(point_link, track_quat) point_sensor = gu.qd_inv_transform_by_trans_quat(point_world, sensor_pos, sensor_quat) for k in qd.static(range(3)): @@ -1289,23 +1151,24 @@ def _kernel_elastomer_shear_accumulate( sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), sensor_pc_start: qd.types.ndarray(), - sensor_pc_n: qd.types.ndarray(), lambda_s: qd.types.ndarray(), shear_scale: qd.types.ndarray(), eps: float, surface_pos_sensor_buf: qd.types.ndarray(), surface_entry_pos_sensor_buf: qd.types.ndarray(), surface_depth_buf: qd.types.ndarray(), - surface_initialized_buf: qd.types.ndarray(), + shear_active_pc_idx: qd.types.ndarray(), + shear_active_pc_count: qd.types.ndarray(), output: qd.types.ndarray(), ): - """Target-major shear accumulator: per (env, target_probe), iterate over the sensor's pc rows - that are flagged ``surface_initialized`` and sum Gaussian contributions into a register, then - += the result into ``output``. No atomic_add (each (i_b, i_p) thread owns its output slot). - - Must run after the surface-state kernel AND after the post-kernel ``surface_initialized_buf &= candidate`` - cleanup that invalidates the flag for BVH-pruned points -- otherwise stale True flags from prior - steps would corrupt this step's accumulation. + """Target-major shear accumulator: per (env, target_probe), iterate over the sensor's compact + active surface-point index and sum Gaussian contributions into a register, then += the result + into ``output``. No atomic_add (each (i_b, i_p) thread owns its output slot). + + Consumes the compact index produced by ``_build_shear_active_pc_index`` (must run after the + surface-state kernel AND after the post-kernel ``surface_initialized_buf &= candidate`` cleanup). + Inner-loop cost is O(active_count[i_b, i_s]) rather than O(sensor_pc_n[i_s]), so the kernel scales + with contact density rather than total point-cloud size. """ total_n_probes = probe_positions_local.shape[0] n_batches = surface_pos_sensor_buf.shape[0] @@ -1322,15 +1185,14 @@ def _kernel_elastomer_shear_accumulate( cache_start = sensor_cache_start[i_s] _i_p = i_p - sensor_probe_start[i_s] pc_start = sensor_pc_start[i_s] - pc_end = pc_start + sensor_pc_n[i_s] + n_active = shear_active_pc_count[i_b, i_s] - probe_local = _func_vec3_at(probe_positions_local, i_p) - probe_normal = _func_vec3_at(probe_local_normal, i_p) + probe_local = func_vec3_at(probe_positions_local, i_p) + probe_normal = func_vec3_at(probe_local_normal, i_p) acc = qd.Vector.zero(gs.qd_float, 3) - for i_o in range(pc_start, pc_end): - if not surface_initialized_buf[i_b, i_o]: - continue + for j in range(n_active): + i_o = shear_active_pc_idx[i_b, pc_start + j] depth = surface_depth_buf[i_b, i_o] if depth <= eps: continue @@ -1367,6 +1229,52 @@ def _kernel_elastomer_shear_accumulate( output[cache_start + _i_p * 3 + k, i_b] = output[cache_start + _i_p * 3 + k, i_b] + acc[k] +def _build_shear_active_pc_index( + surface_initialized_buf: torch.Tensor, + sensor_pc_start: torch.Tensor, + sensor_pc_n: torch.Tensor, + shear_scale: torch.Tensor, + active_pc_idx: torch.Tensor, + active_pc_count: torch.Tensor, +) -> None: + """Build the compact per-(env, sensor) active surface-point index consumed by + ``_kernel_elastomer_shear_accumulate``. Mutates ``active_pc_idx`` and ``active_pc_count`` in place. + + For each sensor ``s`` with ``shear_scale[s] > 0``, gathers the indices of True entries in + ``surface_initialized_buf[:, pc_start[s] : pc_start[s] + pc_n[s]]`` into the per-sensor compact slice + ``active_pc_idx[:, pc_start[s] : pc_start[s] + active_count[:, s]]``; the per-(env, sensor) active + count is written to ``active_pc_count[:, s]``. Sensors with ``shear_scale == 0`` are skipped and + their count is left at zero so the kernel's outer early-exit handles them with no extra work. + + Uses exclusive cumsum + ``torch.nonzero`` for the per-sensor scatter so per-env Python loops are + avoided; cost is ~O(B * total_n_surface) torch ops over the whole pass. + """ + active_pc_count.zero_() + n_sensors = sensor_pc_start.shape[0] + if n_sensors == 0: + return + # Single host sync up front so the per-sensor loop is metadata-only on the Python side. + pc_starts = sensor_pc_start.tolist() + pc_ns = sensor_pc_n.tolist() + scales = shear_scale.tolist() + idx_dtype = active_pc_idx.dtype + for s in range(n_sensors): + if scales[s] <= 0.0: + continue + pc_start = int(pc_starts[s]) + pc_n = int(pc_ns[s]) + if pc_n == 0: + continue + mask = surface_initialized_buf[:, pc_start : pc_start + pc_n] # (B, pc_n) bool + int_mask = mask.to(idx_dtype) + # Exclusive cumsum -> per-row write position within the sensor's compact slice. + write_pos = torch.cumsum(int_mask, dim=1) - int_mask + active_pc_count[:, s] = int_mask.sum(dim=1) + bs, js = torch.nonzero(mask, as_tuple=True) + if bs.numel() > 0: + active_pc_idx[bs, pc_start + write_pos[bs, js]] = (pc_start + js).to(idx_dtype) + + def _elastomer_taxel_grid_fft_dilate( grid_fft_meta: list[tuple], grid_fft_kernels_stacked: torch.Tensor, @@ -1480,6 +1388,16 @@ class ElastomerTaxelSensorMetadata( # stale surface_initialized / surface_entry_pos for points the BVH skipped this step. surface_candidate_buf: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_bool) + # Compact per-(env, sensor) active surface-point index, rebuilt every step right after the + # ``surface_initialized_buf &= candidate`` cleanup and consumed by ``_kernel_elastomer_shear_accumulate``. + # For sensor ``s`` in env ``i_b``, the first ``shear_active_pc_count[i_b, s]`` entries of + # ``shear_active_pc_idx[i_b, sensor_pc_start[s]:]`` hold the global pc-row indices whose + # ``surface_initialized_buf`` is True. Drops the per-probe inner-loop cost from O(sensor_pc_n[s]) to + # O(active_count[i_b, s]), so shear accumulate now scales with contact density rather than total point-cloud + # size. Sensors with ``shear_scale == 0`` have count = 0 (the kernel skips them anyway). + shear_active_pc_idx: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_int) + shear_active_pc_count: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_int) + # Per-sensor flag selecting the FFT dilation path vs the direct (non-grid) dilation kernel. use_grid_fft: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_bool) # Per-grid-FFT-sensor tangent basis, consumed by the dilation write-back. ``grid_fft_meta`` tuples for this @@ -1619,6 +1537,16 @@ def build(self): (B, total_n_surface), dtype=gs.tc_bool, device=gs.device ) + # Compact active-point index for the shear accumulator. Re-allocated on each ElastomerTaxel build so the + # ``(B, total_n_surface)`` idx buffer and ``(B, n_sensors)`` count buffer absorb the newly registered sensor. + # Both are allocated unconditionally (zero-init); the per-step build at ``_build_shear_active_pc_index`` + # leaves entries for non-shear sensors at count == 0, so unread regions remain harmless zeros. + n_sensors_built = self._shared_metadata.n_probes_per_sensor.shape[0] + self._shared_metadata.shear_active_pc_idx = torch.zeros((B, total_n_surface), dtype=gs.tc_int, device=gs.device) + self._shared_metadata.shear_active_pc_count = torch.zeros( + (B, n_sensors_built), dtype=gs.tc_int, device=gs.device + ) + self._shared_metadata.use_grid_fft = concat_with_tensor( self._shared_metadata.use_grid_fft, self._use_grid_fft, expand=(1,) ) @@ -1786,9 +1714,9 @@ def _update_current_timestep_data( bvh.node_max, bvh.node_left, bvh.node_right, - bvh.node_point_start, - bvh.node_point_n, - bvh.point_idx, + bvh.node_leaf_start, + bvh.node_leaf_count, + bvh.leaf_elem_idx, shared_metadata.pc_pos_link, shared_metadata.pc_active_envs_mask, shared_metadata.elastomer_contact_sdf_enter, @@ -1806,12 +1734,21 @@ def _update_current_timestep_data( ) # Invalidate stale surface state for points the BVH did not visit. surface_initialized # and entry-pos survive across steps; depth/pos are gated by initialized downstream so - # they don't need clearing. The shear accumulator below gates on surface_initialized_buf - # -- without this step, stale True from a prior step would corrupt accumulation. + # they don't need clearing. The shear accumulator below reads from a compact index + # rebuilt from surface_initialized -- without this step, stale True from a prior step + # would inject phantom contributions. cand = shared_metadata.surface_candidate_buf shared_metadata.surface_initialized_buf &= cand # Implicit boolโ†’float broadcast zeros entries where cand=False, no `~` allocation. shared_metadata.surface_entry_pos_sensor_buf.mul_(cand.unsqueeze(-1)) + _build_shear_active_pc_index( + shared_metadata.surface_initialized_buf, + shared_metadata.sensor_pc_start, + shared_metadata.sensor_pc_n, + shared_metadata.shear_scale, + shared_metadata.shear_active_pc_idx, + shared_metadata.shear_active_pc_count, + ) _kernel_elastomer_shear_accumulate( shared_metadata.probe_positions, shared_metadata.probe_local_normal, @@ -1820,14 +1757,14 @@ def _update_current_timestep_data( shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, shared_metadata.sensor_pc_start, - shared_metadata.sensor_pc_n, shared_metadata.lambda_s, shared_metadata.shear_scale, gs.EPS, shared_metadata.surface_pos_sensor_buf, shared_metadata.surface_entry_pos_sensor_buf, shared_metadata.surface_depth_buf, - shared_metadata.surface_initialized_buf, + shared_metadata.shear_active_pc_idx, + shared_metadata.shear_active_pc_count, current_ground_truth_data_T, ) diff --git a/genesis/engine/sensors/surface_distance_probe.py b/genesis/engine/sensors/surface_distance_probe.py index 66eaa4ad5f..53abca82d1 100644 --- a/genesis/engine/sensors/surface_distance_probe.py +++ b/genesis/engine/sensors/surface_distance_probe.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import TYPE_CHECKING import numpy as np @@ -8,10 +8,8 @@ import genesis as gs import genesis.utils.array_class as array_class import genesis.utils.geom as gu -from genesis.engine.solvers.rigid.abd.forward_kinematics import func_update_all_verts from genesis.options.sensors import SurfaceDistanceProbe as SurfaceDistanceProbeOptions from genesis.utils.misc import concat_with_tensor, make_tensor_field, tensor_to_array -from genesis.utils.raycast_qd import get_triangle_vertices from .base_sensor import RigidSensorMetadataMixin, RigidSensorMixin, SimpleSensor, SimpleSensorMetadata from .probe import ( @@ -20,6 +18,15 @@ func_noised_probe_radius, get_measured_bufs, ) +from .tactile_shared import ( + BVH_LEAF_SIZE, + BVH_STACK_SIZE, + BVHMetadata, + build_static_chunk_bvh, + func_sphere_intersects_aabb, + func_vec3_at, + get_mesh_geom_chunks, +) if TYPE_CHECKING: from genesis.utils.ring_buffer import TensorRingBuffer @@ -88,8 +95,125 @@ def _func_closest_point_on_triangle(point: gs.qd_vec3, v0: gs.qd_vec3, v1: gs.qd return closest +@dataclass +class TriangleMeshBVH(BVHMetadata): + """ + BVH over tracked mesh triangles for one sensor class. ``leaf_elem_idx`` entries are absolute rows + into ``tri_verts``, a flat per-class table of link-local triangle vertices (shape ``(total_n_tri, + 3, 3)``: per triangle, three xyz vertex positions). See ``BVHMetadata`` for the shared scaffolding + semantics. Rigid-link assumption: built once at scene init, never rebuilt. + """ + + tri_verts: torch.Tensor = make_tensor_field((0, 3, 3)) + + def append_sensor(self, track_link_idx: np.ndarray, solver) -> None: + """ + Build per-tracked-link chunks for one sensor (link-local triangle BVH) and append into the flat + tensors. Sensors with no tracked-link geometry register zero chunks; the kernel's per-sensor + chunk loop iterates ``[0, sensor_chunk_count[i_s])`` and is a no-op for those. + """ + new_chunk_link_idx: list[int] = [] + new_chunk_node_start: list[int] = [] + new_chunk_node_count: list[int] = [] + all_node_min: list[np.ndarray] = [] + all_node_max: list[np.ndarray] = [] + all_node_left: list[np.ndarray] = [] + all_node_right: list[np.ndarray] = [] + all_node_leaf_start: list[np.ndarray] = [] + all_node_leaf_count: list[np.ndarray] = [] + all_leaf_elem_idx: list[np.ndarray] = [] + all_tri_verts: list[np.ndarray] = [] + + chunk_start_for_sensor = int(self.chunk_link_idx.shape[0]) + node_offset = int(self.node_min.shape[0]) + leaf_offset = int(self.leaf_elem_idx.shape[0]) + tri_offset = int(self.tri_verts.shape[0]) + + for i_l in range(int(track_link_idx.shape[0])): + link_idx = int(track_link_idx[i_l]) + link = solver.links[link_idx] + geom_chunks = get_mesh_geom_chunks(link, prefer_visual=False) + if not geom_chunks: + continue + # Concatenate triangles from all geoms of this link into one chunk. + tri_v0_list: list[np.ndarray] = [] + tri_v1_list: list[np.ndarray] = [] + tri_v2_list: list[np.ndarray] = [] + for _geom, verts_link, faces in geom_chunks: + tri_v0_list.append(verts_link[faces[:, 0]]) + tri_v1_list.append(verts_link[faces[:, 1]]) + tri_v2_list.append(verts_link[faces[:, 2]]) + v0 = np.concatenate(tri_v0_list, axis=0).astype(np.float32, copy=False) + v1 = np.concatenate(tri_v1_list, axis=0).astype(np.float32, copy=False) + v2 = np.concatenate(tri_v2_list, axis=0).astype(np.float32, copy=False) + n_tri = int(v0.shape[0]) + if n_tri == 0: + continue + + centroids = (v0 + v1 + v2) / 3.0 + aabb_mins = np.minimum(np.minimum(v0, v1), v2) + aabb_maxs = np.maximum(np.maximum(v0, v1), v2) + + tri_stack = np.stack((v0, v1, v2), axis=1) # (n_tri, 3, 3) + global_rows = (tri_offset + np.arange(n_tri, dtype=np.int32)).astype(np.int32) + + nmin, nmax, nleft, nright, lstart, lcount, eidx = build_static_chunk_bvh( + centroids, aabb_mins, aabb_maxs, global_rows, BVH_LEAF_SIZE + ) + + new_chunk_link_idx.append(link_idx) + new_chunk_node_start.append(node_offset) + new_chunk_node_count.append(int(nmin.shape[0])) + + all_node_min.append(nmin) + all_node_max.append(nmax) + # Rebase intra-chunk child / leaf-start indices into the flat tensors' absolute space. + all_node_left.append(np.where(nleft >= 0, nleft + node_offset, nleft).astype(np.int32)) + all_node_right.append(np.where(nright >= 0, nright + node_offset, nright).astype(np.int32)) + all_node_leaf_start.append(np.where(lcount > 0, lstart + leaf_offset, lstart).astype(np.int32)) + all_node_leaf_count.append(lcount) + all_leaf_elem_idx.append(eidx) + all_tri_verts.append(tri_stack.astype(np.float32, copy=False)) + + node_offset += int(nmin.shape[0]) + leaf_offset += int(eidx.shape[0]) + tri_offset += n_tri + + if not new_chunk_link_idx: + # No tracked links contributed geometry; record zero chunks for this sensor. + self.sensor_chunk_start = concat_with_tensor(self.sensor_chunk_start, chunk_start_for_sensor, expand=(1,)) + self.sensor_chunk_count = concat_with_tensor(self.sensor_chunk_count, 0, expand=(1,)) + return + + nm = torch.tensor(np.concatenate(all_node_min, axis=0), dtype=gs.tc_float, device=gs.device) + nx = torch.tensor(np.concatenate(all_node_max, axis=0), dtype=gs.tc_float, device=gs.device) + nl = torch.tensor(np.concatenate(all_node_left, axis=0), dtype=gs.tc_int, device=gs.device) + nr = torch.tensor(np.concatenate(all_node_right, axis=0), dtype=gs.tc_int, device=gs.device) + lst = torch.tensor(np.concatenate(all_node_leaf_start, axis=0), dtype=gs.tc_int, device=gs.device) + lct = torch.tensor(np.concatenate(all_node_leaf_count, axis=0), dtype=gs.tc_int, device=gs.device) + eidx_t = torch.tensor(np.concatenate(all_leaf_elem_idx, axis=0), dtype=gs.tc_int, device=gs.device) + tv = torch.tensor(np.concatenate(all_tri_verts, axis=0), dtype=gs.tc_float, device=gs.device) + cli = torch.tensor(new_chunk_link_idx, dtype=gs.tc_int, device=gs.device) + cns = torch.tensor(new_chunk_node_start, dtype=gs.tc_int, device=gs.device) + cnc = torch.tensor(new_chunk_node_count, dtype=gs.tc_int, device=gs.device) + + self.node_min = concat_with_tensor(self.node_min, nm, expand=(nm.shape[0], 3)) + self.node_max = concat_with_tensor(self.node_max, nx, expand=(nx.shape[0], 3)) + self.node_left = concat_with_tensor(self.node_left, nl, expand=(nl.shape[0],)) + self.node_right = concat_with_tensor(self.node_right, nr, expand=(nr.shape[0],)) + self.node_leaf_start = concat_with_tensor(self.node_leaf_start, lst, expand=(lst.shape[0],)) + self.node_leaf_count = concat_with_tensor(self.node_leaf_count, lct, expand=(lct.shape[0],)) + self.leaf_elem_idx = concat_with_tensor(self.leaf_elem_idx, eidx_t, expand=(eidx_t.shape[0],)) + self.tri_verts = concat_with_tensor(self.tri_verts, tv, expand=(tv.shape[0], 3, 3)) + self.chunk_link_idx = concat_with_tensor(self.chunk_link_idx, cli, expand=(cli.shape[0],)) + self.chunk_node_start = concat_with_tensor(self.chunk_node_start, cns, expand=(cns.shape[0],)) + self.chunk_node_count = concat_with_tensor(self.chunk_node_count, cnc, expand=(cnc.shape[0],)) + self.sensor_chunk_start = concat_with_tensor(self.sensor_chunk_start, chunk_start_for_sensor, expand=(1,)) + self.sensor_chunk_count = concat_with_tensor(self.sensor_chunk_count, len(new_chunk_link_idx), expand=(1,)) + + @qd.kernel -def _kernel_surface_distance_probe( +def _kernel_surface_distance_probe_bvh( probe_positions_local: qd.types.ndarray(), probe_radii: qd.types.ndarray(), probe_radii_noise: qd.types.ndarray(), @@ -97,44 +221,48 @@ def _kernel_surface_distance_probe( links_idx: qd.types.ndarray(), sensor_cache_start: qd.types.ndarray(), sensor_probe_start: qd.types.ndarray(), - track_link_start: qd.types.ndarray(), - track_link_end: qd.types.ndarray(), - track_link_flat: qd.types.ndarray(), - static_rigid_sim_config: qd.template(), + bvh_sensor_chunk_start: qd.types.ndarray(), + bvh_sensor_chunk_count: qd.types.ndarray(), + bvh_chunk_link_idx: qd.types.ndarray(), + bvh_chunk_node_start: qd.types.ndarray(), + bvh_node_min: qd.types.ndarray(), + bvh_node_max: qd.types.ndarray(), + bvh_node_left: qd.types.ndarray(), + bvh_node_right: qd.types.ndarray(), + bvh_node_leaf_start: qd.types.ndarray(), + bvh_node_leaf_count: qd.types.ndarray(), + bvh_leaf_elem_idx: qd.types.ndarray(), + bvh_tri_verts: qd.types.ndarray(), links_state: array_class.LinksState, - links_info: array_class.LinksInfo, - geoms_info: array_class.GeomsInfo, - geoms_state: array_class.GeomsState, - faces_info: array_class.FacesInfo, - verts_info: array_class.VertsInfo, - fixed_verts_state: array_class.VertsState, - free_verts_state: array_class.VertsState, positions_gt: qd.types.ndarray(), positions_measured: qd.types.ndarray(), output_gt: qd.types.ndarray(), output_measured: qd.types.ndarray(), ): + """ + BVH-accelerated surface-distance query. + + Per ``(probe, env)``: transform the probe into each tracked-link's local frame, traverse the + per-(sensor, tracked-link) static BVH with a fixed-depth stack, cull nodes via sphere-vs-AABB with + radius squared = current best (the larger of GT / measured branch), and on leaf nodes call + closest-point-on-triangle against the stored link-local vertices. The closest world-frame point is + written to ``positions_*`` and the distance to ``output_*``. + """ total_n_probes = probe_positions_local.shape[0] n_batches = output_gt.shape[-1] - func_update_all_verts( - geoms_state, geoms_info, verts_info, free_verts_state, fixed_verts_state, static_rigid_sim_config - ) - for i_p, i_b in qd.ndrange(total_n_probes, n_batches): i_s = probe_sensor_idx[i_p] sensor_link_idx = links_idx[i_s] link_pos = links_state.pos[sensor_link_idx, i_b] link_quat = links_state.quat[sensor_link_idx, i_b] - probe_pos_local = qd.Vector( - [probe_positions_local[i_p, 0], probe_positions_local[i_p, 1], probe_positions_local[i_p, 2]] - ) - probe_pos = link_pos + gu.qd_transform_by_quat(probe_pos_local, link_quat) + probe_local = func_vec3_at(probe_positions_local, i_p) + probe_world = link_pos + gu.qd_transform_by_quat(probe_local, link_quat) max_r_gt = probe_radii[i_p] best_dist_sq_gt = max_r_gt * max_r_gt - best_point_gt = probe_pos + best_point_gt = probe_world probe_radius_noise = probe_radii_noise[i_p] use_noised_radius = probe_radius_noise > gs.EPS @@ -142,37 +270,67 @@ def _kernel_surface_distance_probe( if use_noised_radius: max_r_m = func_noised_probe_radius(max_r_gt, probe_radius_noise) best_dist_sq_m = max_r_m * max_r_m - best_point_m = probe_pos - - start = track_link_start[i_s] - end = track_link_end[i_s] - - for k in range(start, end): - i_l = track_link_flat[k] - I_l = [i_l, i_b] if qd.static(static_rigid_sim_config.batch_links_info) else i_l - geom_start = links_info.geom_start[I_l] - geom_end = links_info.geom_end[I_l] - - for i_g in range(geom_start, geom_end): - face_start = geoms_info.face_start[i_g] - face_end = geoms_info.face_end[i_g] - - for i_f in range(face_start, face_end): - tri_verts = get_triangle_vertices( - i_f, i_b, faces_info, verts_info, fixed_verts_state, free_verts_state - ) - v0 = tri_verts[:, 0] - v1 = tri_verts[:, 1] - v2 = tri_verts[:, 2] - closest = _func_closest_point_on_triangle(probe_pos, v0, v1, v2) - diff = closest - probe_pos - dist_sq = diff.dot(diff) - if dist_sq < best_dist_sq_gt: - best_dist_sq_gt = dist_sq - best_point_gt = closest - if use_noised_radius and dist_sq < best_dist_sq_m: - best_dist_sq_m = dist_sq - best_point_m = closest + best_point_m = probe_world + + chunk_start = bvh_sensor_chunk_start[i_s] + n_chunks = bvh_sensor_chunk_count[i_s] + for c_off in range(n_chunks): + i_c = chunk_start + c_off + track_link_idx = bvh_chunk_link_idx[i_c] + track_pos = links_state.pos[track_link_idx, i_b] + track_quat = links_state.quat[track_link_idx, i_b] + # BVH lives in the tracked link's local frame; bring the probe over. + probe_link = gu.qd_inv_transform_by_trans_quat(probe_world, track_pos, track_quat) + + stack = qd.Vector.zero(gs.qd_int, qd.static(BVH_STACK_SIZE)) + stack[0] = bvh_chunk_node_start[i_c] + stack_idx = 1 + + while stack_idx > 0: + stack_idx -= 1 + n = stack[stack_idx] + bmin = func_vec3_at(bvh_node_min, n) + bmax = func_vec3_at(bvh_node_max, n) + # Cull when min distance from probe to AABB exceeds the conservative current best. + cull_radius_sq = qd.max(best_dist_sq_gt, best_dist_sq_m) + if not func_sphere_intersects_aabb(probe_link, cull_radius_sq, bmin, bmax): + continue + left = bvh_node_left[n] + if left == -1: + fstart = bvh_node_leaf_start[n] + fn = bvh_node_leaf_count[n] + for j in range(fn): + i_f = bvh_leaf_elem_idx[fstart + j] + v0 = qd.Vector( + [bvh_tri_verts[i_f, 0, 0], bvh_tri_verts[i_f, 0, 1], bvh_tri_verts[i_f, 0, 2]], + dt=gs.qd_float, + ) + v1 = qd.Vector( + [bvh_tri_verts[i_f, 1, 0], bvh_tri_verts[i_f, 1, 1], bvh_tri_verts[i_f, 1, 2]], + dt=gs.qd_float, + ) + v2 = qd.Vector( + [bvh_tri_verts[i_f, 2, 0], bvh_tri_verts[i_f, 2, 1], bvh_tri_verts[i_f, 2, 2]], + dt=gs.qd_float, + ) + closest_link = _func_closest_point_on_triangle(probe_link, v0, v1, v2) + diff = closest_link - probe_link + dist_sq = diff.dot(diff) + if dist_sq < best_dist_sq_gt or (use_noised_radius and dist_sq < best_dist_sq_m): + # Transform the hit back to world frame and record on whichever branch tightened. + closest_world = track_pos + gu.qd_transform_by_quat(closest_link, track_quat) + if dist_sq < best_dist_sq_gt: + best_dist_sq_gt = dist_sq + best_point_gt = closest_world + if use_noised_radius and dist_sq < best_dist_sq_m: + best_dist_sq_m = dist_sq + best_point_m = closest_world + else: + right = bvh_node_right[n] + stack[stack_idx] = left + stack_idx += 1 + stack[stack_idx] = right + stack_idx += 1 best_dist_gt = qd.sqrt(best_dist_sq_gt) best_dist_m = best_dist_gt @@ -194,13 +352,17 @@ def _kernel_surface_distance_probe( @dataclass class SurfaceDistanceProbeSensorMetadataMixin(ProbeSensorMetadataMixin): - """Shared metadata for surface distance probe sensors: tracked links and nearest-point buffer.""" + """ + Shared metadata for surface distance probe sensors: tracked-link bookkeeping, nearest-point buffer, + and the per-class static triangle-mesh BVH consumed by ``_kernel_surface_distance_probe_bvh``. + """ track_link_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) track_link_end: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) track_link_flat: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) nearest_positions: torch.Tensor = make_tensor_field((0, 0, 3)) nearest_positions_measured: torch.Tensor = make_tensor_field((0, 0, 3)) + bvh: TriangleMeshBVH = field(default_factory=TriangleMeshBVH) @dataclass @@ -258,6 +420,10 @@ def build(self): slice_start = self._shared_metadata.sensor_probe_start[self._idx] self._nearest_points_slice = slice(slice_start, slice_start + self._n_probes) + # Build the per-(sensor, tracked-link) triangle BVH in link-local frame. Rigid links don't deform, + # so this is a one-shot scene-build cost; per-step queries traverse the static structure. + self._shared_metadata.bvh.append_sensor(track_link_idx, self._shared_metadata.solver) + @classmethod def reset(cls, shared_metadata: SurfaceDistanceProbeMetadata, shared_ground_truth_cache: torch.Tensor, envs_idx): super().reset(shared_metadata, shared_ground_truth_cache, envs_idx) @@ -278,7 +444,8 @@ def _update_current_timestep_data( measured, measured_cols_b = get_measured_bufs( shared_metadata, current_ground_truth_data_T, measured_data_timeline ) - _kernel_surface_distance_probe( + bvh = shared_metadata.bvh + _kernel_surface_distance_probe_bvh( shared_metadata.probe_positions, shared_metadata.probe_radii, shared_metadata.probe_radii_noise, @@ -286,18 +453,19 @@ def _update_current_timestep_data( shared_metadata.links_idx, shared_metadata.sensor_cache_start, shared_metadata.sensor_probe_start, - shared_metadata.track_link_start, - shared_metadata.track_link_end, - shared_metadata.track_link_flat, - solver._static_rigid_sim_config, + bvh.sensor_chunk_start, + bvh.sensor_chunk_count, + bvh.chunk_link_idx, + bvh.chunk_node_start, + bvh.node_min, + bvh.node_max, + bvh.node_left, + bvh.node_right, + bvh.node_leaf_start, + bvh.node_leaf_count, + bvh.leaf_elem_idx, + bvh.tri_verts, solver.links_state, - solver.links_info, - solver.geoms_info, - solver.geoms_state, - solver.faces_info, - solver.verts_info, - solver.fixed_verts_state, - solver.free_verts_state, shared_metadata.nearest_positions, shared_metadata.nearest_positions_measured, current_ground_truth_data_T, diff --git a/genesis/engine/sensors/tactile_shared.py b/genesis/engine/sensors/tactile_shared.py index df430a8ac4..3c8866fa50 100644 --- a/genesis/engine/sensors/tactile_shared.py +++ b/genesis/engine/sensors/tactile_shared.py @@ -3,9 +3,11 @@ from typing import TYPE_CHECKING, Callable, Generic, TypeVar import numpy as np +import quadrants as qd import torch import genesis as gs +import genesis.utils.geom as gu from genesis.utils.misc import concat_with_tensor, make_tensor_field if TYPE_CHECKING: @@ -16,7 +18,9 @@ def next_pow2(n: int) -> int: - """Smallest power of 2 >= ``n`` (1 if ``n == 0``).""" + """ + Smallest power of 2 >= ``n`` (1 if ``n == 0``). + """ if n <= 1: return 1 p = 1 @@ -25,6 +29,202 @@ def next_pow2(n: int) -> int: return p +# ============================ BVH helpers (shared by point-cloud and triangle-mesh sensors) ============================ + + +BVH_LEAF_SIZE = 8 +BVH_STACK_SIZE = 32 + + +def get_mesh_geom_chunks(link, prefer_visual: bool) -> list[tuple[object, np.ndarray, np.ndarray]]: + """ + Return per-geom mesh chunks ``(geom, verts_link, faces)`` in link-local frame. + + ``prefer_visual`` picks vgeoms over geoms when both exist; falls back to the other type when the + preferred one is absent. Empty meshes are dropped from the list. + """ + if prefer_visual: + geoms = list(link.vgeoms) if link.vgeoms else list(link.geoms) + use_vverts = bool(link.vgeoms) + else: + geoms = list(link.geoms) if link.geoms else list(link.vgeoms) + use_vverts = not bool(link.geoms) and bool(link.vgeoms) + + chunks: list[tuple[object, np.ndarray, np.ndarray]] = [] + for geom in geoms: + if use_vverts: + verts = np.asarray(geom.init_vverts, dtype=np.float32) + faces = np.asarray(geom.init_vfaces, dtype=np.int32) + else: + verts = np.asarray(geom.init_verts, dtype=np.float32) + faces = np.asarray(geom.init_faces, dtype=np.int32) + if verts.size == 0 or faces.size == 0: + continue + verts_link = gu.transform_by_trans_quat(verts, geom.init_pos, geom.init_quat) + chunks.append((geom, verts_link.astype(np.float32, copy=False), np.asarray(faces, dtype=np.int32))) + return chunks + + +def build_static_chunk_bvh( + centroids: np.ndarray, + aabb_mins: np.ndarray, + aabb_maxs: np.ndarray, + global_rows: np.ndarray, + leaf_size: int, +) -> tuple[np.ndarray, ...]: + """ + Median-split AABB BVH over a static set of elements (points, triangles, etc.) in link-local frame. + + Split decisions use ``centroids`` along the longest-spread axis; node AABBs union the per-element + ``aabb_mins``/``aabb_maxs``. For point-cloud BVHs, callers pass ``centroids == aabb_mins == aabb_maxs`` + (the points themselves); for triangle BVHs, callers pass per-triangle centroid + min/max bounds. + + Leaves carry the caller-provided ``global_rows`` (absolute rows into the sensor-class element table); + the kernel indexes directly into that table with no extra indirection. Internal nodes use -1 for + ``node_left`` / ``node_right``. Returns ``(node_min, node_max, node_left, node_right, node_elem_start, + node_elem_n, elem_idx)``. + """ + node_min: list[np.ndarray] = [] + node_max: list[np.ndarray] = [] + node_left: list[int] = [] + node_right: list[int] = [] + node_elem_start: list[int] = [] + node_elem_n: list[int] = [] + elem_idx: list[int] = [] + + def _alloc() -> int: + i = len(node_min) + node_min.append(np.zeros(3, dtype=np.float32)) + node_max.append(np.zeros(3, dtype=np.float32)) + node_left.append(-1) + node_right.append(-1) + node_elem_start.append(-1) + node_elem_n.append(0) + return i + + def _build(rows: np.ndarray, cents: np.ndarray, a_mins: np.ndarray, a_maxs: np.ndarray) -> int: + nid = _alloc() + bmin = a_mins.min(axis=0).astype(np.float32) + bmax = a_maxs.max(axis=0).astype(np.float32) + node_min[nid] = bmin + node_max[nid] = bmax + if rows.shape[0] <= leaf_size: + start = len(elem_idx) + elem_idx.extend(int(r) for r in rows) + node_elem_start[nid] = start + node_elem_n[nid] = int(rows.shape[0]) + return nid + axis = int(np.argmax(bmax - bmin)) + order = np.argsort(cents[:, axis], kind="stable") + mid = order.shape[0] // 2 + node_left[nid] = _build(rows[order[:mid]], cents[order[:mid]], a_mins[order[:mid]], a_maxs[order[:mid]]) + node_right[nid] = _build(rows[order[mid:]], cents[order[mid:]], a_mins[order[mid:]], a_maxs[order[mid:]]) + return nid + + if centroids.shape[0] == 0: + return ( + np.zeros((0, 3), dtype=np.float32), + np.zeros((0, 3), dtype=np.float32), + np.zeros((0,), dtype=np.int32), + np.zeros((0,), dtype=np.int32), + np.zeros((0,), dtype=np.int32), + np.zeros((0,), dtype=np.int32), + np.zeros((0,), dtype=np.int32), + ) + + root = _build( + global_rows.astype(np.int32, copy=False), + centroids.astype(np.float32, copy=False), + aabb_mins.astype(np.float32, copy=False), + aabb_maxs.astype(np.float32, copy=False), + ) + assert root == 0 + return ( + np.stack(node_min, axis=0), + np.stack(node_max, axis=0), + np.asarray(node_left, dtype=np.int32), + np.asarray(node_right, dtype=np.int32), + np.asarray(node_elem_start, dtype=np.int32), + np.asarray(node_elem_n, dtype=np.int32), + np.asarray(elem_idx, dtype=np.int32), + ) + + +@qd.func +def func_vec3_at(values: qd.types.ndarray(), i: int) -> qd.types.vector(3): + return qd.Vector([values[i, 0], values[i, 1], values[i, 2]], dt=float) + + +@qd.func +def func_sphere_intersects_aabb(center, radius_sq, bmin, bmax): # -> bool + """ + Squared-distance sphere-vs-AABB test: True iff the closest AABB point to ``center`` is within + ``radius_sq``. Reused as a closest-point cull by passing ``radius_sq = current_best_dist_sq``. + """ + d_sq = gs.qd_float(0.0) + for k in qd.static(range(3)): + v = center[k] + lo = bmin[k] + hi = bmax[k] + if v < lo: + d = lo - v + d_sq = d_sq + d * d + elif v > hi: + d = v - hi + d_sq = d_sq + d * d + return d_sq <= radius_sq + + +@qd.func +def func_aabb_intersects_aabb(amin, amax, bmin, bmax): # -> bool + """ + Standard 6-axis AABB-vs-AABB overlap test. + """ + return ( + amin[0] <= bmax[0] + and amax[0] >= bmin[0] + and amin[1] <= bmax[1] + and amax[1] >= bmin[1] + and amin[2] <= bmax[2] + and amax[2] >= bmin[2] + ) + + +@dataclass +class BVHMetadata: + """ + Element-agnostic scaffolding for a static, link-local, chunked AABB BVH shared across one sensor class. + + One *chunk* per (sensor, tracked_link): each chunk is a small subtree built once at scene init in the + tracked link's local frame and never rebuilt. Subclasses (PointCloudBVH, TriangleMeshBVH) layer on + element-specific payload tables; ``leaf_elem_idx`` entries are absolute rows into those tables. + + Per-sensor slice into the chunk arrays: + ``chunks[sensor_chunk_start[s] : sensor_chunk_start[s] + sensor_chunk_count[s]]`` + Per-chunk slice into the flat node arrays: + ``nodes[chunk_node_start[c] : chunk_node_start[c] + chunk_node_count[c]]`` + Per-leaf slice into ``leaf_elem_idx``: + ``leaf_elem_idx[node_leaf_start[n] : node_leaf_start[n] + node_leaf_count[n]]`` + ``node_left == -1`` marks a leaf; otherwise ``node_left``/``node_right`` are absolute child indices. + """ + + sensor_chunk_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + sensor_chunk_count: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + + chunk_link_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + chunk_node_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + chunk_node_count: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + + node_min: torch.Tensor = make_tensor_field((0, 3)) + node_max: torch.Tensor = make_tensor_field((0, 3)) + node_left: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + node_right: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + + node_leaf_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + node_leaf_count: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + leaf_elem_idx: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + + # ============================ FFT helpers ============================ @@ -213,6 +413,26 @@ def normalize_grid_probe_layout( ) +# ============================ Contact prefilter ============================ + + +@dataclass +class ContactPrefilterMetadataMixin: + """ + Per-(env, sensor) compact contact list shared by tactile sensors whose kernels iterate the collider's + contact list per probe (KinematicTaxel, ContactDepthProbe). Populated each step by + ``_kernel_build_sensor_contact_idx`` in ``kinematic_tactile.py``; the main kernel's per-probe loop + then touches only contacts whose ``link_a`` or ``link_b`` matches the sensor's tracked link. + + Shape: ``sensor_contacts_idx (B, n_sensors, max_per_sensor)``, ``sensor_n_contacts (B, n_sensors)``. + The per-sensor cap is held by the consuming class (see ``_MAX_CONTACTS_PER_SENSOR``); the kernel + reads it off ``sensor_contacts_idx.shape[2]``. + """ + + sensor_contacts_idx: torch.Tensor = make_tensor_field((0, 0, 0), dtype_factory=lambda: gs.tc_int) + sensor_n_contacts: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_int) + + # ============================ ViscoelasticHysteresis ============================ From 106f72009376f00cf6c85c64b891b5bd382ad005 Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Wed, 27 May 2026 23:45:33 -0400 Subject: [PATCH 6/7] add option to switch raycast or sdf for contact depth query --- genesis/engine/sensors/kinematic_tactile.py | 655 ++++++++++++++++-- genesis/engine/sensors/point_cloud_tactile.py | 549 +++++++++++++-- genesis/engine/sensors/raycaster.py | 120 ++-- genesis/engine/sensors/tactile_shared.py | 25 + genesis/options/sensors/options.py | 8 + genesis/options/sensors/tactile.py | 24 +- genesis/utils/raycast_qd.py | 74 +- tests/test_sensors.py | 88 +++ 8 files changed, 1379 insertions(+), 164 deletions(-) diff --git a/genesis/engine/sensors/kinematic_tactile.py b/genesis/engine/sensors/kinematic_tactile.py index 41ea478b9b..456573eeb7 100644 --- a/genesis/engine/sensors/kinematic_tactile.py +++ b/genesis/engine/sensors/kinematic_tactile.py @@ -11,11 +11,18 @@ import genesis.utils.array_class as array_class import genesis.utils.geom as gu import genesis.utils.sdf as sdf +from genesis.engine.bvh import STACK_SIZE as _BVH_STACK_SIZE from genesis.engine.solvers.rigid.collider.utils import func_point_in_geom_aabb from genesis.options.sensors import ContactDepthProbe as ContactDepthProbeOptions from genesis.options.sensors import ContactProbe as ContactProbeOptions from genesis.options.sensors import KinematicTaxel as KinematicTaxelOptions from genesis.utils.misc import concat_with_tensor, make_tensor_field, tensor_to_array +from genesis.utils.raycast_qd import ( + closest_point_on_triangle, + get_triangle_vertices, + ray_triangle_intersection, + triangle_face_normal, +) from .base_sensor import RigidSensorMetadataMixin, RigidSensorMixin, SimpleSensor, SimpleSensorMetadata from .probe import ( @@ -28,9 +35,11 @@ get_measured_bufs, ) from .tactile_shared import ( + ContactDepthQueryMetadataMixin, ContactPrefilterMetadataMixin, ViscoelasticHysteresisMetadataMixin, ViscoelasticHysteresisMixin, + func_sphere_intersects_aabb, normalize_grid_probe_layout, ) @@ -464,17 +473,483 @@ def _kernel_contact_depth_probe( output_measured[cache_idx, i_b] = max_penetration_m +# ============================ Raycast / BVH contact-depth path ============================ + + +@qd.kernel +def _kernel_build_sensor_candidate_geom_mask( + sensor_contacts_idx: qd.types.ndarray(), + sensor_n_contacts: qd.types.ndarray(), + collider_state: array_class.ColliderState, + sensor_candidate_geom_mask: qd.types.ndarray(), +): + """ + Scatter the per-(env, sensor) candidate-geom bitmask from the prefiltered contact list. Run only when the + sensor class is in ``contact_depth_query="raycast"`` mode; the BVH leaf loop consults this mask to skip + triangles whose owning geom isn't in the sensor's current contact list. + """ + n_batches = sensor_n_contacts.shape[0] + n_sensors = sensor_n_contacts.shape[1] + n_geoms = sensor_candidate_geom_mask.shape[2] + for i_b, i_s in qd.ndrange(n_batches, n_sensors): + for i_g in range(n_geoms): + sensor_candidate_geom_mask[i_b, i_s, i_g] = False + n_c = sensor_n_contacts[i_b, i_s] + for k in range(n_c): + i_c = sensor_contacts_idx[i_b, i_s, k] + sensor_candidate_geom_mask[i_b, i_s, collider_state.contact_data.geom_a[i_c, i_b]] = True + sensor_candidate_geom_mask[i_b, i_s, collider_state.contact_data.geom_b[i_c, i_b]] = True + + +@qd.func +def _func_query_contact_depth_penetration_bvh( + i_b: int, + i_s: int, + probe_pos: qd.types.vector(3), + probe_normal: qd.types.vector(3), + probe_radius_gt: float, + probe_radius_m: float, + bvh_nodes: qd.template(), + bvh_morton_codes: qd.template(), + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + sensor_candidate_geom_mask: qd.types.ndarray(), + eps: float, +): + """ + BVH-based dual-radius probe penetration: at each candidate leaf, combines a sphere/triangle closest-point + test (``pen = R - dist``) with a raycast along ``-probe_normal`` (``pen = R - hit_distance``). Max wins per + triangle and across all visited triangles. Mirrors ``_func_query_contact_depth_penetration``'s return shape + so callers can dispatch without further branching. + """ + n_triangles = faces_info.verts_idx.shape[0] + R_query = qd.max(probe_radius_gt, probe_radius_m) + R_query_sq = R_query * R_query + neg_normal = -probe_normal + + max_pen_gt = gs.qd_float(0.0) + max_pen_m = gs.qd_float(0.0) + + node_stack = qd.Vector.zero(gs.qd_int, qd.static(_BVH_STACK_SIZE)) + node_stack[0] = 0 + stack_idx = 1 + + while stack_idx > 0: + stack_idx -= 1 + node_idx = node_stack[stack_idx] + node = bvh_nodes[i_b, node_idx] + + if not func_sphere_intersects_aabb(probe_pos, R_query_sq, node.bound.min, node.bound.max): + continue + + if node.left == -1: + sorted_leaf_idx = node_idx - (n_triangles - 1) + i_f = qd.cast(bvh_morton_codes[i_b, sorted_leaf_idx][1], gs.qd_int) + i_g = faces_info.geom_idx[i_f] + if not sensor_candidate_geom_mask[i_b, i_s, i_g]: + continue + + tri = get_triangle_vertices(i_f, i_b, faces_info, verts_info, fixed_verts_state, free_verts_state) + v0 = tri[:, 0] + v1 = tri[:, 1] + v2 = tri[:, 2] + + closest = closest_point_on_triangle(probe_pos, v0, v1, v2) + diff = closest - probe_pos + dist = qd.sqrt(diff.dot(diff)) + pen_gt = probe_radius_gt - dist + if pen_gt > max_pen_gt: + max_pen_gt = pen_gt + pen_m = probe_radius_m - dist + if pen_m > max_pen_m: + max_pen_m = pen_m + + hit = ray_triangle_intersection(probe_pos, neg_normal, v0, v1, v2, eps) + if hit.w > 0.5: + if hit.x <= probe_radius_gt: + pen_ray_gt = probe_radius_gt - hit.x + if pen_ray_gt > max_pen_gt: + max_pen_gt = pen_ray_gt + if hit.x <= probe_radius_m: + pen_ray_m = probe_radius_m - hit.x + if pen_ray_m > max_pen_m: + max_pen_m = pen_ray_m + else: + if stack_idx < qd.static(_BVH_STACK_SIZE - 2): + node_stack[stack_idx] = node.left + node_stack[stack_idx + 1] = node.right + stack_idx += 2 + + return max_pen_gt, max_pen_m + + +@qd.func +def _func_query_contact_depth_bvh( + i_b: int, + i_s: int, + probe_pos: qd.types.vector(3), + probe_normal: qd.types.vector(3), + probe_radius_gt: float, + probe_radius_m: float, + bvh_nodes: qd.template(), + bvh_morton_codes: qd.template(), + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + geoms_info: array_class.GeomsInfo, + sensor_candidate_geom_mask: qd.types.ndarray(), + eps: float, +): + """ + BVH-based dual-radius probe query with contact normal and link, mirroring ``_func_query_contact_depth``'s return. + The normal returned is the triangle face normal of the deepest-penetrating triangle (raycast or sphere subtest). + """ + n_triangles = faces_info.verts_idx.shape[0] + R_query = qd.max(probe_radius_gt, probe_radius_m) + R_query_sq = R_query * R_query + radius_gt_sq = probe_radius_gt * probe_radius_gt + radius_m_sq = probe_radius_m * probe_radius_m + neg_normal = -probe_normal + + max_pen_gt = gs.qd_float(0.0) + contact_link_gt = gs.qd_int(-1) + contact_normal_gt = qd.Vector.zero(gs.qd_float, 3) + max_pen_m = gs.qd_float(0.0) + contact_link_m = gs.qd_int(-1) + contact_normal_m = qd.Vector.zero(gs.qd_float, 3) + + node_stack = qd.Vector.zero(gs.qd_int, qd.static(_BVH_STACK_SIZE)) + node_stack[0] = 0 + stack_idx = 1 + + while stack_idx > 0: + stack_idx -= 1 + node_idx = node_stack[stack_idx] + node = bvh_nodes[i_b, node_idx] + + if not func_sphere_intersects_aabb(probe_pos, R_query_sq, node.bound.min, node.bound.max): + continue + + if node.left == -1: + sorted_leaf_idx = node_idx - (n_triangles - 1) + i_f = qd.cast(bvh_morton_codes[i_b, sorted_leaf_idx][1], gs.qd_int) + i_g = faces_info.geom_idx[i_f] + if not sensor_candidate_geom_mask[i_b, i_s, i_g]: + continue + + link_for_geom = geoms_info.link_idx[i_g] + + tri = get_triangle_vertices(i_f, i_b, faces_info, verts_info, fixed_verts_state, free_verts_state) + v0 = tri[:, 0] + v1 = tri[:, 1] + v2 = tri[:, 2] + + face_normal = triangle_face_normal(v0, v1, v2) + + closest = closest_point_on_triangle(probe_pos, v0, v1, v2) + diff = closest - probe_pos + d_sq = diff.dot(diff) + if d_sq <= R_query_sq: + pen_along_normal = diff.dot(neg_normal) + if pen_along_normal > 0.0: + if d_sq <= radius_gt_sq and pen_along_normal > max_pen_gt: + max_pen_gt = pen_along_normal + contact_link_gt = link_for_geom + contact_normal_gt = face_normal + if d_sq <= radius_m_sq and pen_along_normal > max_pen_m: + max_pen_m = pen_along_normal + contact_link_m = link_for_geom + contact_normal_m = face_normal + + hit = ray_triangle_intersection(probe_pos, neg_normal, v0, v1, v2, eps) + if hit.w > 0.5: + if hit.x <= probe_radius_gt: + pen_ray_gt = probe_radius_gt - hit.x + if pen_ray_gt > max_pen_gt: + max_pen_gt = pen_ray_gt + contact_link_gt = link_for_geom + contact_normal_gt = face_normal + if hit.x <= probe_radius_m: + pen_ray_m = probe_radius_m - hit.x + if pen_ray_m > max_pen_m: + max_pen_m = pen_ray_m + contact_link_m = link_for_geom + contact_normal_m = face_normal + else: + if stack_idx < qd.static(_BVH_STACK_SIZE - 2): + node_stack[stack_idx] = node.left + node_stack[stack_idx + 1] = node.right + stack_idx += 2 + + return max_pen_gt, contact_link_gt, contact_normal_gt, max_pen_m, contact_link_m, contact_normal_m + + +@qd.kernel(fastcache=False) +def _kernel_contact_depth_probe_bvh( + probe_positions_local: qd.types.ndarray(), + probe_local_normal: qd.types.ndarray(), + probe_sensor_idx: qd.types.ndarray(), + probe_radii: qd.types.ndarray(), + probe_radii_noise: qd.types.ndarray(), + probe_gains: qd.types.ndarray(), + links_idx: qd.types.ndarray(), + sensor_cache_start: qd.types.ndarray(), + sensor_probe_start: qd.types.ndarray(), + sensor_candidate_geom_mask: qd.types.ndarray(), + bvh_nodes: qd.template(), + bvh_morton_codes: qd.template(), + links_state: array_class.LinksState, + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + output_gt: qd.types.ndarray(), + output_measured: qd.types.ndarray(), +): + total_n_probes = probe_positions_local.shape[0] + n_batches = output_gt.shape[-1] + + for i_p, i_b in qd.ndrange(total_n_probes, n_batches): + i_s = probe_sensor_idx[i_p] + + if probe_radii[i_p] <= gs.qd_float(0.0): + cache_idx = sensor_cache_start[i_s] + i_p - sensor_probe_start[i_s] + output_gt[cache_idx, i_b] = gs.qd_float(0.0) + output_measured[cache_idx, i_b] = gs.qd_float(0.0) + continue + + probe_pos_local = qd.Vector( + [probe_positions_local[i_p, 0], probe_positions_local[i_p, 1], probe_positions_local[i_p, 2]] + ) + probe_normal_local = qd.Vector( + [probe_local_normal[i_p, 0], probe_local_normal[i_p, 1], probe_local_normal[i_p, 2]] + ) + + sensor_link_idx = links_idx[i_s] + link_pos = links_state.pos[sensor_link_idx, i_b] + link_quat = links_state.quat[sensor_link_idx, i_b] + + probe_pos = link_pos + gu.qd_transform_by_quat(probe_pos_local, link_quat) + probe_normal = gu.qd_transform_by_quat(probe_normal_local, link_quat) + + probe_radius = probe_radii[i_p] + probe_radius_noise = probe_radii_noise[i_p] + probe_radius_m = ( + func_noised_probe_radius(probe_radius, probe_radius_noise) if probe_radius_noise > gs.EPS else probe_radius + ) + + max_penetration_gt, max_penetration_m = _func_query_contact_depth_penetration_bvh( + i_b, + i_s, + probe_pos, + probe_normal, + probe_radius, + probe_radius_m, + bvh_nodes, + bvh_morton_codes, + faces_info, + verts_info, + fixed_verts_state, + free_verts_state, + sensor_candidate_geom_mask, + gs.EPS, + ) + max_penetration_m = max_penetration_m * probe_gains[i_b, i_p] + cache_idx = sensor_cache_start[i_s] + i_p - sensor_probe_start[i_s] + output_gt[cache_idx, i_b] = max_penetration_gt + output_measured[cache_idx, i_b] = max_penetration_m + + +@qd.kernel(fastcache=False) +def _kernel_kinematic_taxel_bvh( + probe_positions_local: qd.types.ndarray(), + probe_local_normal: qd.types.ndarray(), + probe_sensor_idx: qd.types.ndarray(), + probe_radii: qd.types.ndarray(), + probe_radii_noise: qd.types.ndarray(), + probe_gains: qd.types.ndarray(), + normal_stiffness: qd.types.ndarray(), + normal_damping: qd.types.ndarray(), + normal_exponent: qd.types.ndarray(), + shear_scalar: qd.types.ndarray(), + twist_scalar: qd.types.ndarray(), + links_idx: qd.types.ndarray(), + sensor_cache_start: qd.types.ndarray(), + sensor_probe_start: qd.types.ndarray(), + n_probes_per_sensor: qd.types.ndarray(), + sensor_candidate_geom_mask: qd.types.ndarray(), + bvh_nodes: qd.template(), + bvh_morton_codes: qd.template(), + links_state: array_class.LinksState, + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + geoms_info: array_class.GeomsInfo, + measured_equals_gt: int, + output_gt: qd.types.ndarray(), + output_measured: qd.types.ndarray(), +): + total_n_probes = probe_positions_local.shape[0] + n_batches = output_gt.shape[-1] + + for i_p, i_b in qd.ndrange(total_n_probes, n_batches): + i_s = probe_sensor_idx[i_p] + probe_idx_in_sensor = i_p - sensor_probe_start[i_s] + cache_start = sensor_cache_start[i_s] + n_probes = n_probes_per_sensor[i_s] + force_start = cache_start + probe_idx_in_sensor * 3 + torque_start = cache_start + n_probes * 3 + probe_idx_in_sensor * 3 + + if probe_radii[i_p] <= gs.qd_float(0.0): + for j in qd.static(range(3)): + output_gt[force_start + j, i_b] = gs.qd_float(0.0) + output_gt[torque_start + j, i_b] = gs.qd_float(0.0) + output_measured[force_start + j, i_b] = gs.qd_float(0.0) + output_measured[torque_start + j, i_b] = gs.qd_float(0.0) + continue + + probe_pos_local = qd.Vector( + [probe_positions_local[i_p, 0], probe_positions_local[i_p, 1], probe_positions_local[i_p, 2]] + ) + probe_normal_local = qd.Vector( + [probe_local_normal[i_p, 0], probe_local_normal[i_p, 1], probe_local_normal[i_p, 2]] + ) + + sensor_link_idx = links_idx[i_s] + link_pos = links_state.pos[sensor_link_idx, i_b] + link_quat = links_state.quat[sensor_link_idx, i_b] + + probe_pos = link_pos + gu.qd_transform_by_quat(probe_pos_local, link_quat) + probe_normal = gu.qd_transform_by_quat(probe_normal_local, link_quat) + + probe_radius = probe_radii[i_p] + probe_radius_noise = probe_radii_noise[i_p] + use_noised_radius = probe_radius_noise > gs.EPS + probe_radius_m = ( + func_noised_probe_radius(probe_radius, probe_radius_noise) if use_noised_radius else probe_radius + ) + + ( + max_penetration_gt, + contact_link_gt, + contact_normal_gt, + max_penetration_m, + contact_link_m, + contact_normal_m, + ) = _func_query_contact_depth_bvh( + i_b, + i_s, + probe_pos, + probe_normal, + probe_radius, + probe_radius_m, + bvh_nodes, + bvh_morton_codes, + faces_info, + verts_info, + fixed_verts_state, + free_verts_state, + geoms_info, + sensor_candidate_geom_mask, + gs.EPS, + ) + + gained_pen_m = max_penetration_m * probe_gains[i_b, i_p] + + force_gt, torque_gt = _func_kinematic_spring_damper( + i_b, + max_penetration_gt, + contact_link_gt, + contact_normal_gt, + sensor_link_idx, + probe_pos, + probe_pos_local, + link_quat, + normal_stiffness[i_s], + normal_damping[i_s], + normal_exponent[i_s], + shear_scalar[i_s], + twist_scalar[i_s], + links_state, + ) + for j in qd.static(range(3)): + output_gt[force_start + j, i_b] = force_gt[j] + output_gt[torque_start + j, i_b] = torque_gt[j] + + if measured_equals_gt == 1: + for j in qd.static(range(3)): + output_measured[force_start + j, i_b] = force_gt[j] + output_measured[torque_start + j, i_b] = torque_gt[j] + else: + force_m, torque_m = _func_kinematic_spring_damper( + i_b, + gained_pen_m, + contact_link_m, + contact_normal_m, + sensor_link_idx, + probe_pos, + probe_pos_local, + link_quat, + normal_stiffness[i_s], + normal_damping[i_s], + normal_exponent[i_s], + shear_scalar[i_s], + twist_scalar[i_s], + links_state, + ) + for j in qd.static(range(3)): + output_measured[force_start + j, i_b] = force_m[j] + output_measured[torque_start + j, i_b] = torque_m[j] + + +def _resolve_query_mode(shared_metadata) -> str: + """Resolve ``shared_metadata.contact_depth_query`` to ``"sdf"`` or ``"raycast"``. ``None`` defaults to ``"sdf"`` + and is latched in so subsequent calls short-circuit.""" + mode = shared_metadata.contact_depth_query + if mode is None: + mode = "sdf" + shared_metadata.contact_depth_query = mode + return mode + + +def _ensure_candidate_geom_mask(shared_metadata, B: int, n_sensors: int, n_geoms: int) -> None: + """(Re)allocate ``sensor_candidate_geom_mask`` to ``(B, n_sensors, n_geoms)`` if its current shape doesn't match. + Sized lazily because the resolved mode may flip to ``"raycast"`` only after all sensors of this class have built.""" + target = (B, n_sensors, n_geoms) + current = tuple(shared_metadata.sensor_candidate_geom_mask.shape) + if current != target: + shared_metadata.sensor_candidate_geom_mask = torch.zeros(target, dtype=gs.tc_bool, device=gs.device) + + class KinematicTactileSensorMixin(ProbeSensorMixin[ProbesWithNormalSensorSharedMetadataT]): def build(self): super().build() + # SDF activation is the default fallback; raycast mode does not require it but the call is idempotent and + # cheap so we keep it unconditional to avoid needing a build-finalize hook. self._shared_metadata.solver.collider.activate_sdf() + # Last-value-wins propagation of the contact_depth_query option onto the per-sensor-type shared metadata. If + # any sensor of this class opts into "raycast", ensure the per-sim shared collision BVH list exists so it's + # ready by the time _update_current_timestep_data runs. + mode = self._options.contact_depth_query + if mode is not None: + self._shared_metadata.contact_depth_query = mode + if self._shared_metadata.contact_depth_query == "raycast": + from genesis.engine.sensors.raycaster import ensure_solver_bvhs + + self._shared_metadata.collision_bvh = ensure_solver_bvhs(self._manager._sim) + @dataclass class ContactDepthProbeMetadata( ViscoelasticHysteresisMetadataMixin, - ProbeSensorMetadataMixin, + ProbesWithNormalSensorMetadataMixin, ContactPrefilterMetadataMixin, + ContactDepthQueryMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata, ): @@ -484,6 +959,7 @@ class ContactDepthProbeMetadata( class ContactDepthProbeSensor( ViscoelasticHysteresisMixin[ContactDepthProbeMetadata], KinematicTactileSensorMixin[ContactDepthProbeMetadata], + ProbesWithNormalSensorMixin[ContactDepthProbeMetadata], RigidSensorMixin[ContactDepthProbeMetadata], SimpleSensor[ContactDepthProbeOptions, ContactDepthProbeMetadata, tuple], ): @@ -526,25 +1002,61 @@ def _update_current_timestep_data( shared_metadata.sensor_contacts_idx, shared_metadata.sensor_n_contacts, ) - _kernel_contact_depth_probe( - shared_metadata.probe_positions, - shared_metadata.probe_sensor_idx, - shared_metadata.probe_radii, - shared_metadata.probe_radii_noise, - shared_metadata.probe_gains, - shared_metadata.links_idx, - shared_metadata.sensor_cache_start, - shared_metadata.sensor_probe_start, - shared_metadata.sensor_contacts_idx, - shared_metadata.sensor_n_contacts, - solver.collider._collider_state, - solver.links_state, - solver.geoms_state, - solver.geoms_info, - solver.collider._sdf._sdf_info, - current_ground_truth_data_T, - measured_cols_b, - ) + + if _resolve_query_mode(shared_metadata) == "sdf": + _kernel_contact_depth_probe( + shared_metadata.probe_positions, + shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, + shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, + shared_metadata.links_idx, + shared_metadata.sensor_cache_start, + shared_metadata.sensor_probe_start, + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, + solver.collider._collider_state, + solver.links_state, + solver.geoms_state, + solver.geoms_info, + solver.collider._sdf._sdf_info, + current_ground_truth_data_T, + measured_cols_b, + ) + else: + from genesis.engine.sensors.raycaster import update_solver_bvhs + + B, n_sensors = shared_metadata.sensor_n_contacts.shape + _ensure_candidate_geom_mask(shared_metadata, B, n_sensors, solver.n_geoms) + _kernel_build_sensor_candidate_geom_mask( + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, + solver.collider._collider_state, + shared_metadata.sensor_candidate_geom_mask, + ) + update_solver_bvhs(shared_metadata.collision_bvh) + rigid_entry = next(e for e in shared_metadata.collision_bvh if e.raycast_mask is None) + _kernel_contact_depth_probe_bvh( + shared_metadata.probe_positions, + shared_metadata.probe_local_normal, + shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, + shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, + shared_metadata.links_idx, + shared_metadata.sensor_cache_start, + shared_metadata.sensor_probe_start, + shared_metadata.sensor_candidate_geom_mask, + rigid_entry.bvh.nodes, + rigid_entry.bvh.morton_codes, + solver.links_state, + solver.faces_info, + solver.verts_info, + solver.fixed_verts_state, + solver.free_verts_state, + current_ground_truth_data_T, + measured_cols_b, + ) if ground_truth_data_timeline is not None: ground_truth_data_timeline.at(0, copy=False).copy_(current_ground_truth_data_T.T) measured.copy_(measured_cols_b.T) @@ -657,6 +1169,7 @@ class KinematicTaxelMetadata( ViscoelasticHysteresisMetadataMixin, ProbesWithNormalSensorMetadataMixin, ContactPrefilterMetadataMixin, + ContactDepthQueryMetadataMixin, RigidSensorMetadataMixin, SimpleSensorMetadata, ): @@ -930,35 +1443,79 @@ def _update_current_timestep_data( shared_metadata.sensor_contacts_idx, shared_metadata.sensor_n_contacts, ) - _kernel_kinematic_taxel( - shared_metadata.probe_positions, - shared_metadata.probe_sensor_idx, - shared_metadata.probe_radii, - shared_metadata.probe_radii_noise, - shared_metadata.probe_gains, - shared_metadata.normal_stiffness, - shared_metadata.normal_damping, - shared_metadata.normal_exponent, - shared_metadata.shear_scalar, - shared_metadata.twist_scalar, - shared_metadata.links_idx, - shared_metadata.sensor_cache_start, - shared_metadata.sensor_probe_start, - shared_metadata.n_probes_per_sensor, - shared_metadata.sensor_contacts_idx, - shared_metadata.sensor_n_contacts, - solver.collider._collider_state, - solver.collider._collider_static_config, - solver.links_state, - solver.geoms_state, - solver.geoms_info, - solver._rigid_global_info, - solver.collider._sdf._sdf_info, - gs.EPS, - measured_equals_gt, - current_ground_truth_data_T, - measured_cols_b, - ) + + if _resolve_query_mode(shared_metadata) == "sdf": + _kernel_kinematic_taxel( + shared_metadata.probe_positions, + shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, + shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, + shared_metadata.normal_stiffness, + shared_metadata.normal_damping, + shared_metadata.normal_exponent, + shared_metadata.shear_scalar, + shared_metadata.twist_scalar, + shared_metadata.links_idx, + shared_metadata.sensor_cache_start, + shared_metadata.sensor_probe_start, + shared_metadata.n_probes_per_sensor, + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, + solver.collider._collider_state, + solver.collider._collider_static_config, + solver.links_state, + solver.geoms_state, + solver.geoms_info, + solver._rigid_global_info, + solver.collider._sdf._sdf_info, + gs.EPS, + measured_equals_gt, + current_ground_truth_data_T, + measured_cols_b, + ) + else: + from genesis.engine.sensors.raycaster import update_solver_bvhs + + B, n_sensors = shared_metadata.sensor_n_contacts.shape + _ensure_candidate_geom_mask(shared_metadata, B, n_sensors, solver.n_geoms) + _kernel_build_sensor_candidate_geom_mask( + shared_metadata.sensor_contacts_idx, + shared_metadata.sensor_n_contacts, + solver.collider._collider_state, + shared_metadata.sensor_candidate_geom_mask, + ) + update_solver_bvhs(shared_metadata.collision_bvh) + rigid_entry = next(e for e in shared_metadata.collision_bvh if e.raycast_mask is None) + _kernel_kinematic_taxel_bvh( + shared_metadata.probe_positions, + shared_metadata.probe_local_normal, + shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, + shared_metadata.probe_radii_noise, + shared_metadata.probe_gains, + shared_metadata.normal_stiffness, + shared_metadata.normal_damping, + shared_metadata.normal_exponent, + shared_metadata.shear_scalar, + shared_metadata.twist_scalar, + shared_metadata.links_idx, + shared_metadata.sensor_cache_start, + shared_metadata.sensor_probe_start, + shared_metadata.n_probes_per_sensor, + shared_metadata.sensor_candidate_geom_mask, + rigid_entry.bvh.nodes, + rigid_entry.bvh.morton_codes, + solver.links_state, + solver.faces_info, + solver.verts_info, + solver.fixed_verts_state, + solver.free_verts_state, + solver.geoms_info, + measured_equals_gt, + current_ground_truth_data_T, + measured_cols_b, + ) if ground_truth_data_timeline is not None: ground_truth_data_timeline.at(0, copy=False).copy_(current_ground_truth_data_T.T) measured.copy_(measured_cols_b.T) diff --git a/genesis/engine/sensors/point_cloud_tactile.py b/genesis/engine/sensors/point_cloud_tactile.py index d5d6d34c9a..455a533cd3 100644 --- a/genesis/engine/sensors/point_cloud_tactile.py +++ b/genesis/engine/sensors/point_cloud_tactile.py @@ -9,10 +9,16 @@ import genesis.utils.array_class as array_class import genesis.utils.geom as gu import genesis.utils.sdf as sdf +from genesis.engine.bvh import STACK_SIZE as _BVH_STACK_SIZE from genesis.options.sensors import ElastomerTaxel as ElastomerTaxelSensorOptions from genesis.options.sensors import ProximityTaxel as ProximityTaxelOptions from genesis.utils.misc import concat_with_tensor, make_tensor_field, tensor_to_array from genesis.utils.point_cloud import sample_mesh_point_cloud +from genesis.utils.raycast_qd import ( + closest_point_on_triangle, + get_triangle_vertices, + triangle_face_normal, +) from .base_sensor import RigidSensorMetadataMixin, RigidSensorMixin, SimpleSensor, SimpleSensorMetadata from .probe import ( @@ -27,6 +33,7 @@ BVH_LEAF_SIZE, BVH_STACK_SIZE, BVHMetadata, + ContactDepthQueryMetadataMixin, GridFFTConvMetadataMixin, ViscoelasticHysteresisMetadataMixin, ViscoelasticHysteresisMixin, @@ -103,12 +110,36 @@ def _active_envs_mask_tensor(geom, batch_size: int) -> torch.Tensor: return geom.active_envs_mask.to(device=gs.device, dtype=gs.tc_bool) +def _group_geoms_by_variant( + geom_chunks: list[tuple[object, np.ndarray, np.ndarray]], batch_size: int +) -> list[tuple[torch.Tensor, list[tuple[object, np.ndarray, np.ndarray]]]]: + """ + Partition a link's geoms into heterogeneous-variant groups by ``active_envs_mask``. Geoms sharing + a mask are one variant; ``None`` masks (homogeneous) collapse into a single all-True group. + Returns ``[(mask, geom_chunks_for_variant), ...]`` preserving the original geom order within each group. + """ + groups: dict[bytes, tuple[torch.Tensor, list[tuple[object, np.ndarray, np.ndarray]]]] = {} + for chunk in geom_chunks: + geom = chunk[0] + mask = _active_envs_mask_tensor(geom, batch_size) + key = tensor_to_array(mask).astype(np.bool_).tobytes() + if key not in groups: + groups[key] = (mask, []) + groups[key][1].append(chunk) + return list(groups.values()) + + def _sample_track_links_point_cloud_tensors( solver, track_link_idx: np.ndarray, n_sample_points: int | list | tuple, prefer_visual: bool ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ FPS-sample meshes on ``track_link_idx`` into concatenated link-local positions and normals. + The per-link budget from ``n_sample_points`` is allocated to every heterogeneous variant on a link + (geoms grouped by ``active_envs_mask``), so each parallel environment sees the full requested point + count regardless of which variant is active. Within a variant, the budget is split across geoms by + surface area. + Returns ------- idx_cat, pos_cat, nrm_cat, active_cat @@ -130,20 +161,22 @@ def _sample_track_links_point_cloud_tensors( geom_chunks = get_mesh_geom_chunks(link, prefer_visual) if not geom_chunks: gs.raise_exception(f"No mesh geometry on tracked link index {link_idx}.") - for n_geom_pts, (geom, verts, faces) in zip(_split_count_by_area(n_pts, geom_chunks), geom_chunks): - if n_geom_pts <= 0: - continue - # Fixed seed: the cache key already discriminates between meshes (vertices+faces hashed), so the same mesh - # always resolves to the same sample, which keeps tactile readings reproducible across build/reset cycles. - pts_np, nrm_np = sample_mesh_point_cloud( - verts, faces, n_geom_pts, seed=0, use_cache=True, return_normals=True - ) + for variant_mask, variant_chunks in _group_geoms_by_variant(geom_chunks, solver._B): + for n_geom_pts, (geom, verts, faces) in zip(_split_count_by_area(n_pts, variant_chunks), variant_chunks): + if n_geom_pts <= 0: + continue + # Fixed seed: the cache key already discriminates between meshes (vertices+faces hashed), so the same + # mesh always resolves to the same sample, which keeps tactile readings reproducible across + # build/reset cycles. + pts_np, nrm_np = sample_mesh_point_cloud( + verts, faces, n_geom_pts, seed=0, use_cache=True, return_normals=True + ) - li = torch.full((pts_np.shape[0],), link_idx, dtype=gs.tc_int, device=gs.device) - link_idx_chunks.append(li) - pos_chunks.append(torch.tensor(pts_np, dtype=gs.tc_float, device=gs.device)) - nrm_chunks.append(torch.tensor(nrm_np, dtype=gs.tc_float, device=gs.device)) - active_chunks.append(_active_envs_mask_tensor(geom, solver._B).expand(pts_np.shape[0], solver._B)) + li = torch.full((pts_np.shape[0],), link_idx, dtype=gs.tc_int, device=gs.device) + link_idx_chunks.append(li) + pos_chunks.append(torch.tensor(pts_np, dtype=gs.tc_float, device=gs.device)) + nrm_chunks.append(torch.tensor(nrm_np, dtype=gs.tc_float, device=gs.device)) + active_chunks.append(variant_mask.expand(pts_np.shape[0], solver._B)) if not pos_chunks: gs.raise_exception("PointCloudTactile sensor produced an empty object point cloud.") @@ -866,6 +899,129 @@ def _dilate_kernel_builder(meta_entry: tuple, fft_n: tuple[int, int]) -> torch.T return _precompute_hydroshear_dilate_kernel_fft(lambda_d, (spacing_u, spacing_v), fft_n, gs.device, gs.tc_float) +@qd.func +def _func_elastomer_min_signed_dist_bvh( + i_b: int, + i_s: int, + probe_world: qd.types.vector(3), + max_query_dist: float, + bvh_nodes: qd.template(), + bvh_morton_codes: qd.template(), + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + track_geom_mask: qd.types.ndarray(), +) -> float: + """ + BVH-based signed distance from ``probe_world`` to the nearest triangle of any geom flagged for this sensor in + ``track_geom_mask`` (shape ``(B, n_sensors, n_geoms)``). Sign is positive when the probe is outside the surface + (closest-triangle face-normal points away from probe), negative when inside. Mirrors the return contract of + ``_func_elastomer_min_sdf_over_active_geoms`` so callers consume ``max(0, -signed)`` identically. + + Uses ``max_query_dist`` as the BVH cull radius: probes farther than that from every candidate triangle are + treated as fully outside (returns ``+max_query_dist``), which downstream maps to depth = 0. + """ + n_triangles = faces_info.verts_idx.shape[0] + best_dist = max_query_dist + best_dist_sq = best_dist * best_dist + best_signed = max_query_dist + + node_stack = qd.Vector.zero(gs.qd_int, qd.static(_BVH_STACK_SIZE)) + node_stack[0] = 0 + stack_idx = 1 + + while stack_idx > 0: + stack_idx -= 1 + node_idx = node_stack[stack_idx] + node = bvh_nodes[i_b, node_idx] + + if not func_sphere_intersects_aabb(probe_world, best_dist_sq, node.bound.min, node.bound.max): + continue + + if node.left == -1: + sorted_leaf_idx = node_idx - (n_triangles - 1) + i_f = qd.cast(bvh_morton_codes[i_b, sorted_leaf_idx][1], gs.qd_int) + i_g = faces_info.geom_idx[i_f] + if not track_geom_mask[i_b, i_s, i_g]: + continue + + tri = get_triangle_vertices(i_f, i_b, faces_info, verts_info, fixed_verts_state, free_verts_state) + v0 = tri[:, 0] + v1 = tri[:, 1] + v2 = tri[:, 2] + closest = closest_point_on_triangle(probe_world, v0, v1, v2) + diff = probe_world - closest + d_sq = diff.dot(diff) + if d_sq < best_dist_sq: + d = qd.sqrt(d_sq) + fn = triangle_face_normal(v0, v1, v2) + # Sign: probe outside if (probe - closest) aligns with outward face normal. + sign_v = qd.select(diff.dot(fn) >= gs.qd_float(0.0), gs.qd_float(1.0), gs.qd_float(-1.0)) + best_signed = d * sign_v + best_dist = d + best_dist_sq = d_sq + else: + if stack_idx < qd.static(_BVH_STACK_SIZE - 2): + node_stack[stack_idx] = node.left + node_stack[stack_idx + 1] = node.right + stack_idx += 2 + + return best_signed + + +@qd.kernel(fastcache=False) +def _kernel_elastomer_probe_depth_bvh( + probe_positions_local: qd.types.ndarray(), + probe_sensor_idx: qd.types.ndarray(), + probe_radii: qd.types.ndarray(), + links_idx: qd.types.ndarray(), + track_geom_mask: qd.types.ndarray(), + max_query_dist: float, + bvh_nodes: qd.template(), + bvh_morton_codes: qd.template(), + links_state: array_class.LinksState, + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + probe_depth_buf: qd.types.ndarray(), +): + """ + Per-probe contact depth from the rigid solver's global collision BVH, gated by ``track_geom_mask``. Mirrors + ``_kernel_elastomer_probe_depth``'s output contract (write into ``probe_depth_buf``); the dilate accumulator + consumes the same buffer downstream. + """ + total_n_probes = probe_positions_local.shape[0] + n_batches = probe_depth_buf.shape[0] + + for i_b, i_p in qd.ndrange(n_batches, total_n_probes): + if probe_radii[i_p] <= gs.qd_float(0.0): + probe_depth_buf[i_b, i_p] = gs.qd_float(0.0) + continue + i_s = probe_sensor_idx[i_p] + sensor_link_idx = links_idx[i_s] + link_pos = links_state.pos[sensor_link_idx, i_b] + link_quat = links_state.quat[sensor_link_idx, i_b] + probe_local = func_vec3_at(probe_positions_local, i_p) + probe_world = link_pos + gu.qd_transform_by_quat(probe_local, link_quat) + + signed = _func_elastomer_min_signed_dist_bvh( + i_b, + i_s, + probe_world, + max_query_dist, + bvh_nodes, + bvh_morton_codes, + faces_info, + verts_info, + fixed_verts_state, + free_verts_state, + track_geom_mask, + ) + probe_depth_buf[i_b, i_p] = qd.max(gs.qd_float(0.0), -signed) + + @qd.kernel(fastcache=True) def _kernel_elastomer_probe_depth( probe_positions_local: qd.types.ndarray(), @@ -1142,6 +1298,166 @@ def _kernel_elastomer_surface_state_bvh( stack_idx += 1 +@qd.kernel(fastcache=False) +def _kernel_elastomer_surface_state_via_global_bvh( + links_idx: qd.types.ndarray(), + sensor_elastomer_geom_start: qd.types.ndarray(), + sensor_elastomer_geom_n: qd.types.ndarray(), + elastomer_geom_idx: qd.types.ndarray(), + elastomer_geom_active_envs_mask: qd.types.ndarray(), + elastomer_candidate_geom_mask: qd.types.ndarray(), + bvh_chunk_sensor_idx: qd.types.ndarray(), + bvh_chunk_link_idx: qd.types.ndarray(), + bvh_chunk_node_start: qd.types.ndarray(), + bvh_node_min: qd.types.ndarray(), + bvh_node_max: qd.types.ndarray(), + bvh_node_left: qd.types.ndarray(), + bvh_node_right: qd.types.ndarray(), + bvh_node_leaf_start: qd.types.ndarray(), + bvh_node_leaf_count: qd.types.ndarray(), + bvh_leaf_elem_idx: qd.types.ndarray(), + pc_pos_link: qd.types.ndarray(), + pc_active_envs_mask: qd.types.ndarray(), + sdf_enter: qd.types.ndarray(), + sdf_exit: qd.types.ndarray(), + aabb_margin: float, + max_query_dist: float, + global_bvh_nodes: qd.template(), + global_bvh_morton_codes: qd.template(), + links_state: array_class.LinksState, + geoms_state: array_class.GeomsState, + faces_info: array_class.FacesInfo, + verts_info: array_class.VertsInfo, + fixed_verts_state: array_class.VertsState, + free_verts_state: array_class.VertsState, + surface_pos_sensor_buf: qd.types.ndarray(), + surface_entry_pos_sensor_buf: qd.types.ndarray(), + surface_depth_buf: qd.types.ndarray(), + surface_initialized_buf: qd.types.ndarray(), + surface_candidate_buf: qd.types.ndarray(), +): + """ + Raycast variant of ``_kernel_elastomer_surface_state_bvh``: same outer (env, chunk) traversal over the point-cloud + BVH per tracked link, but the inner signed-distance query at each PC point uses ``_func_elastomer_min_signed_dist_bvh`` + over the rigid solver's global collision BVH (gated by ``elastomer_candidate_geom_mask``) instead of the analytic + SDF. Output contract matches the SDF variant so the dilate / shear pipeline downstream is unchanged. + """ + n_batches = surface_pos_sensor_buf.shape[0] + n_chunks = bvh_chunk_sensor_idx.shape[0] + + for i_b, i_c in qd.ndrange(n_batches, n_chunks): + i_s = bvh_chunk_sensor_idx[i_c] + + wmin = qd.Vector([gs.qd_float(1e30), gs.qd_float(1e30), gs.qd_float(1e30)], dt=gs.qd_float) + wmax = qd.Vector([gs.qd_float(-1e30), gs.qd_float(-1e30), gs.qd_float(-1e30)], dt=gs.qd_float) + any_active = False + gm_start = sensor_elastomer_geom_start[i_s] + gm_n = sensor_elastomer_geom_n[i_s] + for i_gm in range(gm_start, gm_start + gm_n): + if not elastomer_geom_active_envs_mask[i_gm, i_b]: + continue + i_g = elastomer_geom_idx[i_gm] + gmin = geoms_state.aabb_min[i_g, i_b] + gmax = geoms_state.aabb_max[i_g, i_b] + for k in qd.static(range(3)): + if gmin[k] < wmin[k]: + wmin[k] = gmin[k] + if gmax[k] > wmax[k]: + wmax[k] = gmax[k] + any_active = True + + if not any_active: + continue + + expand = sdf_exit[i_s] + gs.qd_float(aabb_margin) + for k in qd.static(range(3)): + wmin[k] = wmin[k] - expand + wmax[k] = wmax[k] + expand + + track_link_idx = bvh_chunk_link_idx[i_c] + track_pos = links_state.pos[track_link_idx, i_b] + track_quat = links_state.quat[track_link_idx, i_b] + qmin = qd.Vector([gs.qd_float(1e30), gs.qd_float(1e30), gs.qd_float(1e30)], dt=gs.qd_float) + qmax = qd.Vector([gs.qd_float(-1e30), gs.qd_float(-1e30), gs.qd_float(-1e30)], dt=gs.qd_float) + for cx in qd.static(range(2)): + for cy in qd.static(range(2)): + for cz in qd.static(range(2)): + cw_x = wmax[0] if cx == 1 else wmin[0] + cw_y = wmax[1] if cy == 1 else wmin[1] + cw_z = wmax[2] if cz == 1 else wmin[2] + corner_world = qd.Vector([cw_x, cw_y, cw_z], dt=gs.qd_float) + corner_link = gu.qd_inv_transform_by_trans_quat(corner_world, track_pos, track_quat) + for k in qd.static(range(3)): + if corner_link[k] < qmin[k]: + qmin[k] = corner_link[k] + if corner_link[k] > qmax[k]: + qmax[k] = corner_link[k] + + sensor_link_idx = links_idx[i_s] + sensor_pos = links_state.pos[sensor_link_idx, i_b] + sensor_quat = links_state.quat[sensor_link_idx, i_b] + + stack = qd.Vector.zero(gs.qd_int, qd.static(BVH_STACK_SIZE)) + stack[0] = bvh_chunk_node_start[i_c] + stack_idx = 1 + + while stack_idx > 0: + stack_idx -= 1 + n = stack[stack_idx] + bmin = func_vec3_at(bvh_node_min, n) + bmax = func_vec3_at(bvh_node_max, n) + if not func_aabb_intersects_aabb(bmin, bmax, qmin, qmax): + continue + left = bvh_node_left[n] + if left == -1: + pstart = bvh_node_leaf_start[n] + pn = bvh_node_leaf_count[n] + for j in range(pn): + i_o = bvh_leaf_elem_idx[pstart + j] + if not pc_active_envs_mask[i_o, i_b]: + continue + surface_candidate_buf[i_b, i_o] = True + + point_link = func_vec3_at(pc_pos_link, i_o) + point_world = track_pos + gu.qd_transform_by_quat(point_link, track_quat) + point_sensor = gu.qd_inv_transform_by_trans_quat(point_world, sensor_pos, sensor_quat) + for k in qd.static(range(3)): + surface_pos_sensor_buf[i_b, i_o, k] = point_sensor[k] + + min_sdf = _func_elastomer_min_signed_dist_bvh( + i_b, + i_s, + point_world, + max_query_dist, + global_bvh_nodes, + global_bvh_morton_codes, + faces_info, + verts_info, + fixed_verts_state, + free_verts_state, + elastomer_candidate_geom_mask, + ) + + surface_depth_buf[i_b, i_o] = qd.max(gs.qd_float(0.0), -min_sdf) + + _func_elastomer_update_surface_anchor( + i_b, + i_o, + min_sdf, + point_sensor, + sdf_enter[i_s], + sdf_exit[i_s], + surface_entry_pos_sensor_buf, + surface_initialized_buf, + ) + else: + right = bvh_node_right[n] + stack[stack_idx] = left + stack_idx += 1 + stack[stack_idx] = right + stack_idx += 1 + + @qd.kernel(fastcache=True) def _kernel_elastomer_shear_accumulate( probe_positions_local: qd.types.ndarray(), @@ -1356,6 +1672,7 @@ def _elastomer_taxel_grid_fft_dilate( class ElastomerTaxelSensorMetadata( ViscoelasticHysteresisMetadataMixin, GridFFTConvMetadataMixin, + ContactDepthQueryMetadataMixin, PointCloudTactileSharedMetadata, ProbesWithNormalSensorMetadataMixin, ): @@ -1369,6 +1686,11 @@ class ElastomerTaxelSensorMetadata( sensor_elastomer_geom_start: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) sensor_elastomer_geom_n: torch.Tensor = make_tensor_field((0,), dtype_factory=lambda: gs.tc_int) + # Per-(B, sensor, geom) bitmask of elastomer (sensor-own) geoms, used by the global-BVH surface-state kernel + # to gate triangles back to the sensor's elastomer surface. Separate from ``sensor_candidate_geom_mask`` which + # gates by tracked-object geoms for the probe-depth kernel. + elastomer_candidate_geom_mask: torch.Tensor = make_tensor_field((0, 0, 0), dtype_factory=lambda: gs.tc_bool) + lambda_d: torch.Tensor = make_tensor_field((0,)) lambda_s: torch.Tensor = make_tensor_field((0,)) dilate_scale: torch.Tensor = make_tensor_field((0,)) @@ -1448,11 +1770,23 @@ def build(self): super().build() solver = self._shared_metadata.solver - solver.collider.activate_sdf() B = self._manager._sim._B if self._link is None: gs.raise_exception("ElastomerTaxel must be attached to a rigid link with collision geometry.") + # Last-value-wins propagation of contact_depth_query. SDF activation is skipped only when this sensor's own + # option is explicitly "raycast" -- with last-value-wins semantics this can leave SDF activated in mixed-mode + # configurations, but never broken (an unused activated SDF is harmless). + mode = self._options.contact_depth_query + if mode is not None: + self._shared_metadata.contact_depth_query = mode + if mode != "raycast": + solver.collider.activate_sdf() + if self._shared_metadata.contact_depth_query == "raycast": + from genesis.engine.sensors.raycaster import ensure_solver_bvhs + + self._shared_metadata.collision_bvh = ensure_solver_bvhs(self._manager._sim) + elastomer_geom_start_row = self._shared_metadata.elastomer_geom_idx.shape[0] elastomer_geom_idx, elastomer_geom_active_envs_mask = _collect_collision_geom_idx( solver, np.asarray((self._link.idx,), dtype=gs.np_int) @@ -1547,6 +1881,35 @@ def build(self): (B, n_sensors_built), dtype=gs.tc_int, device=gs.device ) + # Build the (B, n_sensors, n_geoms) candidate-geom masks scattered from track_geom_idx (probe-depth) and + # elastomer_geom_idx (surface-anchor). Only needed in raycast mode but allocated cheaply (bool, total + # scene-geom count) so we tolerate the small idle cost in sdf mode. + if self._shared_metadata.contact_depth_query == "raycast": + n_geoms = solver.n_geoms + + def _build_mask(geom_starts_tensor, geom_ns_tensor, geom_idx_tensor): + mask = torch.zeros((B, n_sensors_built, n_geoms), dtype=gs.tc_bool, device=gs.device) + geom_starts = geom_starts_tensor.cpu().numpy() + geom_ns = geom_ns_tensor.cpu().numpy() + idx_np = geom_idx_tensor.cpu().numpy() + for s in range(n_sensors_built): + lo = int(geom_starts[s]) + hi = lo + int(geom_ns[s]) + if hi > lo: + mask[:, s, idx_np[lo:hi]] = True + return mask + + self._shared_metadata.sensor_candidate_geom_mask = _build_mask( + self._shared_metadata.sensor_track_geom_start, + self._shared_metadata.sensor_track_geom_n, + self._shared_metadata.track_geom_idx, + ) + self._shared_metadata.elastomer_candidate_geom_mask = _build_mask( + self._shared_metadata.sensor_elastomer_geom_start, + self._shared_metadata.sensor_elastomer_geom_n, + self._shared_metadata.elastomer_geom_idx, + ) + self._shared_metadata.use_grid_fft = concat_with_tensor( self._shared_metadata.use_grid_fft, self._use_grid_fft, expand=(1,) ) @@ -1652,21 +2015,47 @@ def _update_current_timestep_data( # which is set in lockstep with that same depth write; measured is .copy_'d at the end. measured = measured_data_timeline.at(0, copy=False) - _kernel_elastomer_probe_depth( - shared_metadata.probe_positions, - shared_metadata.probe_sensor_idx, - shared_metadata.probe_radii, - shared_metadata.links_idx, - shared_metadata.sensor_track_geom_start, - shared_metadata.sensor_track_geom_n, - shared_metadata.track_geom_idx, - shared_metadata.track_geom_active_envs_mask, - solver.links_state, - solver.geoms_state, - solver.geoms_info, - solver.collider._sdf._sdf_info, - shared_metadata.probe_depth_buf, - ) + if (shared_metadata.contact_depth_query or "sdf") == "sdf": + _kernel_elastomer_probe_depth( + shared_metadata.probe_positions, + shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, + shared_metadata.links_idx, + shared_metadata.sensor_track_geom_start, + shared_metadata.sensor_track_geom_n, + shared_metadata.track_geom_idx, + shared_metadata.track_geom_active_envs_mask, + solver.links_state, + solver.geoms_state, + solver.geoms_info, + solver.collider._sdf._sdf_info, + shared_metadata.probe_depth_buf, + ) + else: + from genesis.engine.sensors.raycaster import update_solver_bvhs + + shared_metadata.contact_depth_query = "raycast" # latch + update_solver_bvhs(shared_metadata.collision_bvh) + rigid_entry = next(e for e in shared_metadata.collision_bvh if e.raycast_mask is None) + # Conservative BVH walk cap. Far enough to cover any realistic elastomer penetration; probes outside + # this radius from every candidate triangle map to depth = 0. + max_query_dist = float(0.1) + _kernel_elastomer_probe_depth_bvh( + shared_metadata.probe_positions, + shared_metadata.probe_sensor_idx, + shared_metadata.probe_radii, + shared_metadata.links_idx, + shared_metadata.sensor_candidate_geom_mask, + max_query_dist, + rigid_entry.bvh.nodes, + rigid_entry.bvh.morton_codes, + solver.links_state, + solver.faces_info, + solver.verts_info, + solver.fixed_verts_state, + solver.free_verts_state, + shared_metadata.probe_depth_buf, + ) _kernel_elastomer_dilate_accumulate( shared_metadata.use_grid_fft, shared_metadata.probe_positions, @@ -1701,37 +2090,77 @@ def _update_current_timestep_data( if shared_metadata.any_shear: bvh = shared_metadata.pc_bvh shared_metadata.surface_candidate_buf.zero_() - _kernel_elastomer_surface_state_bvh( - shared_metadata.links_idx, - shared_metadata.sensor_elastomer_geom_start, - shared_metadata.sensor_elastomer_geom_n, - shared_metadata.elastomer_geom_idx, - shared_metadata.elastomer_geom_active_envs_mask, - bvh.chunk_sensor_idx, - bvh.chunk_link_idx, - bvh.chunk_node_start, - bvh.node_min, - bvh.node_max, - bvh.node_left, - bvh.node_right, - bvh.node_leaf_start, - bvh.node_leaf_count, - bvh.leaf_elem_idx, - shared_metadata.pc_pos_link, - shared_metadata.pc_active_envs_mask, - shared_metadata.elastomer_contact_sdf_enter, - shared_metadata.elastomer_contact_sdf_exit, - _ELASTOMER_QUERY_AABB_MARGIN, - solver.links_state, - solver.geoms_state, - solver.geoms_info, - solver.collider._sdf._sdf_info, - shared_metadata.surface_pos_sensor_buf, - shared_metadata.surface_entry_pos_sensor_buf, - shared_metadata.surface_depth_buf, - shared_metadata.surface_initialized_buf, - shared_metadata.surface_candidate_buf, - ) + if (shared_metadata.contact_depth_query or "sdf") == "sdf": + _kernel_elastomer_surface_state_bvh( + shared_metadata.links_idx, + shared_metadata.sensor_elastomer_geom_start, + shared_metadata.sensor_elastomer_geom_n, + shared_metadata.elastomer_geom_idx, + shared_metadata.elastomer_geom_active_envs_mask, + bvh.chunk_sensor_idx, + bvh.chunk_link_idx, + bvh.chunk_node_start, + bvh.node_min, + bvh.node_max, + bvh.node_left, + bvh.node_right, + bvh.node_leaf_start, + bvh.node_leaf_count, + bvh.leaf_elem_idx, + shared_metadata.pc_pos_link, + shared_metadata.pc_active_envs_mask, + shared_metadata.elastomer_contact_sdf_enter, + shared_metadata.elastomer_contact_sdf_exit, + _ELASTOMER_QUERY_AABB_MARGIN, + solver.links_state, + solver.geoms_state, + solver.geoms_info, + solver.collider._sdf._sdf_info, + shared_metadata.surface_pos_sensor_buf, + shared_metadata.surface_entry_pos_sensor_buf, + shared_metadata.surface_depth_buf, + shared_metadata.surface_initialized_buf, + shared_metadata.surface_candidate_buf, + ) + else: + rigid_entry = next(e for e in shared_metadata.collision_bvh if e.raycast_mask is None) + _kernel_elastomer_surface_state_via_global_bvh( + shared_metadata.links_idx, + shared_metadata.sensor_elastomer_geom_start, + shared_metadata.sensor_elastomer_geom_n, + shared_metadata.elastomer_geom_idx, + shared_metadata.elastomer_geom_active_envs_mask, + shared_metadata.elastomer_candidate_geom_mask, + bvh.chunk_sensor_idx, + bvh.chunk_link_idx, + bvh.chunk_node_start, + bvh.node_min, + bvh.node_max, + bvh.node_left, + bvh.node_right, + bvh.node_leaf_start, + bvh.node_leaf_count, + bvh.leaf_elem_idx, + shared_metadata.pc_pos_link, + shared_metadata.pc_active_envs_mask, + shared_metadata.elastomer_contact_sdf_enter, + shared_metadata.elastomer_contact_sdf_exit, + _ELASTOMER_QUERY_AABB_MARGIN, + float(0.1), # max_query_dist for the global-BVH closest-point search + rigid_entry.bvh.nodes, + rigid_entry.bvh.morton_codes, + solver.links_state, + solver.geoms_state, + solver.faces_info, + solver.verts_info, + solver.fixed_verts_state, + solver.free_verts_state, + shared_metadata.surface_pos_sensor_buf, + shared_metadata.surface_entry_pos_sensor_buf, + shared_metadata.surface_depth_buf, + shared_metadata.surface_initialized_buf, + shared_metadata.surface_candidate_buf, + ) # Invalidate stale surface state for points the BVH did not visit. surface_initialized # and entry-pos survive across steps; depth/pos are gated by initialized downstream so # they don't need clearing. The shear accumulator below reads from a compact index diff --git a/genesis/engine/sensors/raycaster.py b/genesis/engine/sensors/raycaster.py index c0834147af..623c761beb 100644 --- a/genesis/engine/sensors/raycaster.py +++ b/genesis/engine/sensors/raycaster.py @@ -44,6 +44,76 @@ class _SolverBVH(NamedTuple): raycast_mask: np.ndarray | None +# Per-process cache of collision/visual BVH lists, keyed by ``id(sim)`` so the raycaster and any tactile-probe +# sensors in ``contact_depth_query="raycast"`` mode share one BVH object per simulator without mutating sim state. +_SIM_BVHS: dict[int, list[_SolverBVH]] = {} + + +def ensure_solver_bvhs(sim) -> list[_SolverBVH]: + """ + Idempotently build the per-(solver, mesh-type) collision/visual BVHs for ``sim`` and return the shared list. + + Both ``RaycasterSensor`` and tactile-probe sensors in ``contact_depth_query="raycast"`` mode reuse the same list + so they share the BVH state for a given sim. + """ + sim_key = id(sim) + existing = _SIM_BVHS.get(sim_key) + if existing is not None: + return existing + + solver_bvhs: list[_SolverBVH] = [] + for solver in (sim.rigid_solver, sim.kinematic_solver): + if not solver.is_active: + continue + n_envs = solver._B + if isinstance(solver, RigidSolver): + n_faces = solver.faces_info.geom_idx.shape[0] + aabb = AABB(n_batches=n_envs, n_aabbs=n_faces) + bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64) + solver_bvhs.append(_SolverBVH(solver, bvh, aabb, None)) + n_vfaces = solver.vfaces_info.vgeom_idx.shape[0] + if n_vfaces > 0: + mask = RaycasterSensor._compute_visual_raycast_mask(solver) + if mask.any(): + aabb = AABB(n_batches=n_envs, n_aabbs=n_vfaces) + bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64) + solver_bvhs.append(_SolverBVH(solver, bvh, aabb, mask)) + + _SIM_BVHS[sim_key] = solver_bvhs + return solver_bvhs + + +def update_solver_bvhs(solver_bvhs: list[_SolverBVH]) -> None: + """Rebuild ``solver_bvhs`` from current scene state. Cheap dedup is intentionally NOT applied here: callers can + invoke this between scene mutations (e.g. ``set_pos``) without an intervening simulator substep, so any cross-call + deduplication would mask real geometry changes.""" + for entry in solver_bvhs: + if entry.raycast_mask is None: + kernel_update_verts_and_aabbs( + geoms_info=entry.solver.geoms_info, + geoms_state=entry.solver.geoms_state, + verts_info=entry.solver.verts_info, + faces_info=entry.solver.faces_info, + free_verts_state=entry.solver.free_verts_state, + fixed_verts_state=entry.solver.fixed_verts_state, + static_rigid_sim_config=entry.solver._static_rigid_sim_config, + aabb_state=entry.aabb, + ) + entry.bvh.build() + else: + entry.solver.update_forward_pos() + entry.solver.update_vgeoms() + kernel_update_visual_aabbs( + vverts_info=entry.solver.vverts_info, + vverts_state=entry.solver.vverts_state, + vfaces_info=entry.solver.vfaces_info, + vgeoms_state=entry.solver.vgeoms_state, + face_mask=entry.raycast_mask, + aabb_state=entry.aabb, + ) + entry.bvh.build() + + @dataclass class RaycasterSharedMetadata(KinematicSensorMetadataMixin, SimpleSensorMetadata): # All BVHs (one per active solver per mesh type) cast against each frame. The first is written into the output cache @@ -106,35 +176,7 @@ def _compute_visual_raycast_mask(solver: "KinematicSolver") -> np.ndarray: @classmethod def _update_bvh(cls, shared_metadata: RaycasterSharedMetadata): """Rebuild every BVH from current geometry in the scene.""" - for entry in shared_metadata.solver_bvhs: - if entry.raycast_mask is None: - kernel_update_verts_and_aabbs( - geoms_info=entry.solver.geoms_info, - geoms_state=entry.solver.geoms_state, - verts_info=entry.solver.verts_info, - faces_info=entry.solver.faces_info, - free_verts_state=entry.solver.free_verts_state, - fixed_verts_state=entry.solver.fixed_verts_state, - static_rigid_sim_config=entry.solver._static_rigid_sim_config, - aabb_state=entry.aabb, - ) - entry.bvh.build() - else: - # Reads vverts_state.pos as the source of vvert positions. The buffer is seeded by FK at scene.build() - # and refreshed for each user-driven entity via set_vverts; entries set via set_vverts survive across - # calls until set_vverts(None) re-runs FK over the entity's vgeoms. raycast_mask gates which vfaces - # contribute to the BVH; masked-out vfaces keep an inverted AABB and are skipped by ray queries. - entry.solver.update_forward_pos() - entry.solver.update_vgeoms() - kernel_update_visual_aabbs( - vverts_info=entry.solver.vverts_info, - vverts_state=entry.solver.vverts_state, - vfaces_info=entry.solver.vfaces_info, - vgeoms_state=entry.solver.vgeoms_state, - face_mask=entry.raycast_mask, - aabb_state=entry.aabb, - ) - entry.bvh.build() + update_solver_bvhs(shared_metadata.solver_bvhs) def build(self): super().build() @@ -148,23 +190,9 @@ def build(self): self._shared_metadata.sensor_cache_offsets, 0 ) - sim = self._manager._sim - for solver in (sim.rigid_solver, sim.kinematic_solver): - if not solver.is_active: - continue - n_envs = solver._B - if isinstance(solver, RigidSolver): - n_faces = solver.faces_info.geom_idx.shape[0] - aabb = AABB(n_batches=n_envs, n_aabbs=n_faces) - bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64) - self._shared_metadata.solver_bvhs.append(_SolverBVH(solver, bvh, aabb, None)) - n_vfaces = solver.vfaces_info.vgeom_idx.shape[0] - if n_vfaces > 0: - mask = self._compute_visual_raycast_mask(solver) - if mask.any(): - aabb = AABB(n_batches=n_envs, n_aabbs=n_vfaces) - bvh = LBVH(aabb, max_n_query_result_per_aabb=0, n_radix_sort_groups=64) - self._shared_metadata.solver_bvhs.append(_SolverBVH(solver, bvh, aabb, mask)) + # Reuse the per-sim shared collision/visual BVH list so tactile-probe sensors in raycast mode see the same + # BVH as the raycaster and the per-frame rebuild is amortized across all consumers. + self._shared_metadata.solver_bvhs = ensure_solver_bvhs(self._manager._sim) if not self._shared_metadata.solver_bvhs: gs.raise_exception( diff --git a/genesis/engine/sensors/tactile_shared.py b/genesis/engine/sensors/tactile_shared.py index 3c8866fa50..b16ef66df7 100644 --- a/genesis/engine/sensors/tactile_shared.py +++ b/genesis/engine/sensors/tactile_shared.py @@ -11,6 +11,7 @@ from genesis.utils.misc import concat_with_tensor, make_tensor_field if TYPE_CHECKING: + from genesis.engine.sensors.raycaster import _SolverBVH from genesis.utils.ring_buffer import TensorRingBuffer @@ -433,6 +434,30 @@ class ContactPrefilterMetadataMixin: sensor_n_contacts: torch.Tensor = make_tensor_field((0, 0), dtype_factory=lambda: gs.tc_int) +# ============================ Contact depth query mode (SDF vs raycast) ============================ + + +@dataclass +class ContactDepthQueryMetadataMixin: + """ + Shared per-sensor-class state for the contact-depth query backend. + + ``contact_depth_query`` is the single resolved mode for every sensor of this class -- ``"sdf"`` or ``"raycast"``, + defaulting to ``None`` until the first sensor's ``build()`` sets it. Sensors of the same class must agree (mismatch + raises in ``build``); different classes may independently choose modes. + + When mode is ``"raycast"``, ``collision_bvh`` holds a reference to the per-sim shared BVH list built lazily by + ``ensure_solver_bvhs`` (genesis/engine/sensors/raycaster.py). ``sensor_candidate_geom_mask`` is a per-(env, sensor) + bool gate scattered into per step from the contact prefilter list (KinematicTactile family) or once at build from + ``sensor_track_geom_idx`` (ElastomerTaxel); BVH leaves whose ``faces_info.geom_idx`` falls outside the mask are + skipped, preserving the SDF path's per-sensor geom-filtering semantics. + """ + + contact_depth_query: str | None = None + collision_bvh: "list[_SolverBVH] | None" = None + sensor_candidate_geom_mask: torch.Tensor = make_tensor_field((0, 0, 0), dtype_factory=lambda: gs.tc_bool) + + # ============================ ViscoelasticHysteresis ============================ diff --git a/genesis/options/sensors/options.py b/genesis/options/sensors/options.py index 9d21644114..5dd19118df 100644 --- a/genesis/options/sensors/options.py +++ b/genesis/options/sensors/options.py @@ -247,8 +247,16 @@ class ProbesWithNormalSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): probe_local_normal: UnitVec3FType | UnitVec3FArrayType | UnitVec3FGridType = (0.0, 0.0, 1.0) + def _probe_local_normal_required(self) -> bool: + """Override in subclasses where ``probe_local_normal`` is only consumed by an opt-in mode (e.g. raycast + contact-depth queries). When ``False``, the per-probe shape validation in ``model_post_init`` is skipped -- + sensors that never read the normal don't surface confusing length errors for the default value.""" + return True + def model_post_init(self, context: Any) -> None: super().model_post_init(context) + if not self._probe_local_normal_required(): + return n_probes = int(np.prod(np.asarray(self.probe_local_pos).shape[:-1])) normals = np.asarray(self.probe_local_normal) if normals.ndim > 1 and normals.size // 3 != n_probes: diff --git a/genesis/options/sensors/tactile.py b/genesis/options/sensors/tactile.py index e335154bdb..0fd5aebcd5 100644 --- a/genesis/options/sensors/tactile.py +++ b/genesis/options/sensors/tactile.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Literal import numpy as np from pydantic import Field, StrictBool @@ -107,6 +107,12 @@ class TactileProbeSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): is untouched. dead_taxel_value_range : (float, float), optional Uniform range for the dead value sampled per channel on reset. Default ``(0.0, 0.0)``. + contact_depth_query : {"sdf", "raycast"} or None, optional + Per-probe contact-depth backend. ``"sdf"`` queries the per-geom analytic SDF grid (fast, exact for primitives, + requires SDF activation). ``"raycast"`` walks the rigid solver's per-frame collision-mesh BVH and combines a + sphere/triangle closest-point test with a ray/triangle test along ``-probe_normal`` (handles arbitrary meshes + uniformly, shares the BVH with ``RaycasterSensor``). ``None`` (default) defers the choice: all sensors of the + same class must agree, and the resolved mode is ``"sdf"`` if no sensor of that class sets it. """ debug_contact_color: UnitIntervalVec3Type = (1.0, 0.2, 0.0) @@ -115,6 +121,7 @@ class TactileProbeSensorOptionsMixin(ProbeSensorOptionsMixin[SensorT]): probe_gain_resample_range: PositiveVec2FType | None = None dead_taxel_probability: NonNegativeFloat = 0.0 dead_taxel_value_range: Vec2FType = (0.0, 0.0) + contact_depth_query: Literal["sdf", "raycast"] | None = None def model_post_init(self, context: Any) -> None: super().model_post_init(context) @@ -139,7 +146,10 @@ class PointCloudTactileSensorMixin(TactileProbeSensorOptionsMixin[SensorT]): track_link_idx : array-like[int] Global link indices whose mesh geometry is used to sample a point cloud from. n_sample_points: int | array-like[int] - Total FPS samples split across ``track_link_idx``, or one count per tracked link. + Total FPS samples split across ``track_link_idx``, or one count per tracked link. Per-variant + counts are not supported: when a tracked link belongs to a heterogeneous entity, the per-link + count is allocated to every variant on that link (so each parallel environment sees the full + count regardless of which variant is active). use_visual_mesh : bool Whether to use the visual mesh when sampling the point cloud. debug_point_cloud_color : array-like[float, float, float, float] @@ -160,8 +170,13 @@ class ContactProbe( RigidSensorOptionsMixin["ContactProbeSensor"], SimpleSensorOptions["ContactProbeSensor"], TactileProbeSensorOptionsMixin["ContactProbeSensor"], + ProbesWithNormalSensorOptionsMixin["ContactProbeSensor"], ViscoelasticHysteresisOptionsMixin["ContactProbeSensor"], ): + def _probe_local_normal_required(self) -> bool: + # SDF mode ignores probe_normal entirely; raycast mode uses it for the per-leaf raycast subtest. + return self.contact_depth_query == "raycast" + """ Returns boolean contact per probe based on the contact depth threshold. @@ -197,12 +212,17 @@ class ContactDepthProbe( RigidSensorOptionsMixin["ContactDepthProbeSensor"], SimpleSensorOptions["ContactDepthProbeSensor"], TactileProbeSensorOptionsMixin["ContactDepthProbeSensor"], + ProbesWithNormalSensorOptionsMixin["ContactDepthProbeSensor"], ViscoelasticHysteresisOptionsMixin["ContactDepthProbeSensor"], ): """ Returns contact depth in meters per probe. """ + def _probe_local_normal_required(self) -> bool: + # SDF mode ignores probe_normal entirely; raycast mode uses it for the per-leaf raycast subtest. + return self.contact_depth_query == "raycast" + class KinematicTaxel( RigidSensorOptionsMixin["KinematicTaxelSensor"], diff --git a/genesis/utils/raycast_qd.py b/genesis/utils/raycast_qd.py index 8eed2b27cc..376d8e9298 100644 --- a/genesis/utils/raycast_qd.py +++ b/genesis/utils/raycast_qd.py @@ -108,10 +108,7 @@ def bvh_ray_cast( if hit_result.w > 0.0 and hit_result.x < closest_distance and hit_result.x >= 0.0: closest_distance = hit_result.x hit_face = i_f - # Compute triangle normal - edge1 = v1 - v0 - edge2 = v2 - v0 - hit_normal = edge1.cross(edge2).normalized() + hit_normal = triangle_face_normal(v0, v1, v2) else: # Internal node # Push children onto stack if stack_idx < qd.static(STACK_SIZE - 2): @@ -226,6 +223,71 @@ def ray_aabb_intersection( return result +@qd.func +def closest_point_on_triangle( + point: qd.types.vector(3), + v0: qd.types.vector(3), + v1: qd.types.vector(3), + v2: qd.types.vector(3), +) -> qd.types.vector(3): + """ + Closest point on a triangle to a query point. Reference: Christer Ericson, Real-Time Collision Detection ยง5.1.5. + """ + ab = v1 - v0 + ac = v2 - v0 + ap = point - v0 + + d1 = ab.dot(ap) + d2 = ac.dot(ap) + + closest = v0 + if not (d1 <= 0.0 and d2 <= 0.0): + bp = point - v1 + d3 = ab.dot(bp) + d4 = ac.dot(bp) + + if d3 >= 0.0 and d4 <= d3: + closest = v1 + else: + cp = point - v2 + d5 = ab.dot(cp) + d6 = ac.dot(cp) + + if d6 >= 0.0 and d5 <= d6: + closest = v2 + else: + vc = d1 * d4 - d3 * d2 + if vc <= 0.0 and d1 >= 0.0 and d3 <= 0.0: + w = d1 / (d1 - d3) + closest = v0 + w * ab + else: + vb = d5 * d2 - d1 * d6 + if vb <= 0.0 and d2 >= 0.0 and d6 <= 0.0: + w = d2 / (d2 - d6) + closest = v0 + w * ac + else: + va = d3 * d6 - d5 * d4 + if va <= 0.0 and (d4 - d3) >= 0.0 and (d5 - d6) >= 0.0: + w = (d4 - d3) / ((d4 - d3) + (d5 - d6)) + closest = v1 + w * (v2 - v1) + else: + denom = 1.0 / (va + vb + vc) + v = vb * denom + w = vc * denom + closest = v0 + v * ab + w * ac + return closest + + +@qd.func +def triangle_face_normal( + v0: qd.types.vector(3), + v1: qd.types.vector(3), + v2: qd.types.vector(3), +) -> qd.types.vector(3): + """Outward unit normal of the triangle (v0, v1, v2) under right-hand winding.""" + return (v1 - v0).cross(v2 - v0).normalized() + + @qd.func def update_aabbs( free_verts_state: array_class.VertsState, @@ -362,9 +424,7 @@ def bvh_ray_cast_visual( if hit_result.w > 0.0 and hit_result.x < closest_distance and hit_result.x >= 0.0: closest_distance = hit_result.x hit_face = i_f - edge1 = v1 - v0 - edge2 = v2 - v0 - hit_normal = edge1.cross(edge2).normalized() + hit_normal = triangle_face_normal(v0, v1, v2) else: if stack_idx < qd.static(STACK_SIZE - 2): node_stack[stack_idx] = node.left diff --git a/tests/test_sensors.py b/tests/test_sensors.py index bf4edf26a2..c34eea7a3c 100644 --- a/tests/test_sensors.py +++ b/tests/test_sensors.py @@ -2796,6 +2796,16 @@ def test_tactile_sensors_heterogeneous_object(show_viewer, tol): ) scene.build(n_envs=2) + + # Per-variant sampling: each heterogeneous variant must receive the full n_sample_points budget so + # every parallel env sees the requested point count regardless of which variant is active there. + for pc_sensor, n_requested in ((proximity_taxel, 800), (elastomer_taxel, 800)): + meta = pc_sensor._shared_metadata + pc_start = int(meta.sensor_pc_start[pc_sensor._idx].item()) + pc_end = pc_start + int(meta.sensor_pc_n[pc_sensor._idx].item()) + per_env_active = meta.pc_active_envs_mask[pc_start:pc_end].sum(dim=0) + assert_equal(per_env_active, torch.full_like(per_env_active, n_requested)) + obj.set_pos( [ [0.0, 0.0, PAD_TOP_Z + OBJECT_Z_SIZE / 2 - PENETRATION], @@ -2821,6 +2831,84 @@ def test_tactile_sensors_heterogeneous_object(show_viewer, tol): assert surface_distance[0, 1] < surface_distance[1, 1] +@pytest.mark.required +def test_tactile_contact_depth_query_sdf_vs_raycast_parity(show_viewer): + """SDF and raycast contact-depth backends should agree on a face-on contact across all four probe sensors.""" + PAD_SIZE = (0.2, 0.2, 0.05) + PAD_TOP_Z = PAD_SIZE[2] + BALL_R = 0.04 + PROBE_R = 0.01 + CENTER_PROBE = (0.0, 0.0, PAD_SIZE[2] / 2) + + scene = gs.Scene( + sim_options=gs.options.SimOptions(gravity=(0.0, 0.0, 0.0)), + profiling_options=gs.options.ProfilingOptions(show_FPS=False), + show_viewer=show_viewer, + ) + pad = scene.add_entity(gs.morphs.Box(size=PAD_SIZE, pos=(0.0, 0.0, PAD_SIZE[2] / 2), fixed=True)) + ball = scene.add_entity(gs.morphs.Sphere(radius=BALL_R, pos=(0.0, 0.0, 0.4))) + + common = dict(entity_idx=pad.idx, probe_local_pos=(CENTER_PROBE,), probe_radius=PROBE_R) + depth_sdf = scene.add_sensor(gs.sensors.ContactDepthProbe(contact_depth_query="sdf", **common)) + depth_ray = scene.add_sensor(gs.sensors.ContactDepthProbe(contact_depth_query="raycast", **common)) + kin_sdf = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_normal=(0.0, 0.0, 1.0), + normal_stiffness=100.0, + normal_damping=0.0, + shear_scalar=0.0, + twist_scalar=0.0, + contact_depth_query="sdf", + **common, + ) + ) + kin_ray = scene.add_sensor( + gs.sensors.KinematicTaxel( + probe_local_normal=(0.0, 0.0, 1.0), + normal_stiffness=100.0, + normal_damping=0.0, + shear_scalar=0.0, + twist_scalar=0.0, + contact_depth_query="raycast", + **common, + ) + ) + elast_common = dict( + entity_idx=pad.idx, + probe_local_pos=(CENTER_PROBE,), + probe_local_normal=(0.0, 0.0, 1.0), + probe_radius=PROBE_R, + track_link_idx=(ball.base_link_idx,), + n_sample_points=200, + ) + elast_sdf = scene.add_sensor(gs.sensors.ElastomerTaxel(contact_depth_query="sdf", **elast_common)) + elast_ray = scene.add_sensor(gs.sensors.ElastomerTaxel(contact_depth_query="raycast", **elast_common)) + scene.build(n_envs=0) + + ball.set_pos((0.0, 0.0, PAD_TOP_Z + BALL_R - 0.005)) # 5mm penetration + scene.step() + + # ContactDepthProbe -- the probe-radius offset semantics mean the actual numeric values are similar in both modes. + sdf_d = depth_sdf.read_ground_truth() + ray_d = depth_ray.read_ground_truth() + assert (sdf_d > gs.EPS).all() and (ray_d > gs.EPS).all() + assert_allclose(sdf_d, ray_d, tol=0.1 * PROBE_R) + + # KinematicTaxel force: both modes report a force in the same direction with magnitude within mesh-discretization + # tolerance of each other. + sdf_f = kin_sdf.read_ground_truth().force.reshape(-1, 3) + ray_f = kin_ray.read_ground_truth().force.reshape(-1, 3) + assert torch.linalg.norm(sdf_f, dim=-1).item() > 0 + assert torch.linalg.norm(ray_f, dim=-1).item() > 0 + cos_sim = (sdf_f * ray_f).sum(dim=-1) / ( + torch.linalg.norm(sdf_f, dim=-1) * torch.linalg.norm(ray_f, dim=-1) + gs.EPS + ) + assert (cos_sim > 0.9).all(), f"force direction mismatch: cos_sim={cos_sim}" + + # ElastomerTaxel dilate displacement: face-on contact, identical on both modes when geom is a sphere primitive. + assert_allclose(elast_sdf.read_ground_truth(), elast_ray.read_ground_truth(), tol=0.1 * PROBE_R) + + # ------------------------------------------------------------------------------------------ # ----------------------------------- Bulk read API ---------------------------------------- # ------------------------------------------------------------------------------------------ From 10e9993b4de74bd6d8d1e27a454df2769187d21a Mon Sep 17 00:00:00 2001 From: Trinity Chung Date: Thu, 28 May 2026 00:05:53 -0400 Subject: [PATCH 7/7] cleanup --- genesis/engine/sensors/kinematic_tactile.py | 46 +++++-------------- genesis/engine/sensors/point_cloud_tactile.py | 23 ++++------ genesis/engine/sensors/tactile_shared.py | 17 ++++--- tests/test_sensors.py | 43 ++++++++++++++--- 4 files changed, 67 insertions(+), 62 deletions(-) diff --git a/genesis/engine/sensors/kinematic_tactile.py b/genesis/engine/sensors/kinematic_tactile.py index 456573eeb7..dd26ea22bc 100644 --- a/genesis/engine/sensors/kinematic_tactile.py +++ b/genesis/engine/sensors/kinematic_tactile.py @@ -24,6 +24,8 @@ triangle_face_normal, ) +from .raycaster import ensure_solver_bvhs, update_solver_bvhs + from .base_sensor import RigidSensorMetadataMixin, RigidSensorMixin, SimpleSensor, SimpleSensorMetadata from .probe import ( ProbeSensorMetadataMixin, @@ -906,41 +908,17 @@ def _kernel_kinematic_taxel_bvh( output_measured[torque_start + j, i_b] = torque_m[j] -def _resolve_query_mode(shared_metadata) -> str: - """Resolve ``shared_metadata.contact_depth_query`` to ``"sdf"`` or ``"raycast"``. ``None`` defaults to ``"sdf"`` - and is latched in so subsequent calls short-circuit.""" - mode = shared_metadata.contact_depth_query - if mode is None: - mode = "sdf" - shared_metadata.contact_depth_query = mode - return mode - - -def _ensure_candidate_geom_mask(shared_metadata, B: int, n_sensors: int, n_geoms: int) -> None: - """(Re)allocate ``sensor_candidate_geom_mask`` to ``(B, n_sensors, n_geoms)`` if its current shape doesn't match. - Sized lazily because the resolved mode may flip to ``"raycast"`` only after all sensors of this class have built.""" - target = (B, n_sensors, n_geoms) - current = tuple(shared_metadata.sensor_candidate_geom_mask.shape) - if current != target: - shared_metadata.sensor_candidate_geom_mask = torch.zeros(target, dtype=gs.tc_bool, device=gs.device) - - class KinematicTactileSensorMixin(ProbeSensorMixin[ProbesWithNormalSensorSharedMetadataT]): def build(self): super().build() - # SDF activation is the default fallback; raycast mode does not require it but the call is idempotent and - # cheap so we keep it unconditional to avoid needing a build-finalize hook. + # SDF activation is the default fallback; the idempotent call keeps it active even in mixed-mode scenes. self._shared_metadata.solver.collider.activate_sdf() - # Last-value-wins propagation of the contact_depth_query option onto the per-sensor-type shared metadata. If - # any sensor of this class opts into "raycast", ensure the per-sim shared collision BVH list exists so it's - # ready by the time _update_current_timestep_data runs. + # Last-value-wins propagation of contact_depth_query. Build the shared collision BVH lazily on raycast opt-in. mode = self._options.contact_depth_query if mode is not None: self._shared_metadata.contact_depth_query = mode if self._shared_metadata.contact_depth_query == "raycast": - from genesis.engine.sensors.raycaster import ensure_solver_bvhs - self._shared_metadata.collision_bvh = ensure_solver_bvhs(self._manager._sim) @@ -1003,7 +981,7 @@ def _update_current_timestep_data( shared_metadata.sensor_n_contacts, ) - if _resolve_query_mode(shared_metadata) == "sdf": + if (shared_metadata.contact_depth_query or "sdf") == "sdf": _kernel_contact_depth_probe( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, @@ -1024,10 +1002,10 @@ def _update_current_timestep_data( measured_cols_b, ) else: - from genesis.engine.sensors.raycaster import update_solver_bvhs - B, n_sensors = shared_metadata.sensor_n_contacts.shape - _ensure_candidate_geom_mask(shared_metadata, B, n_sensors, solver.n_geoms) + mask_shape = (B, n_sensors, solver.n_geoms) + if tuple(shared_metadata.sensor_candidate_geom_mask.shape) != mask_shape: + shared_metadata.sensor_candidate_geom_mask = torch.zeros(mask_shape, dtype=gs.tc_bool, device=gs.device) _kernel_build_sensor_candidate_geom_mask( shared_metadata.sensor_contacts_idx, shared_metadata.sensor_n_contacts, @@ -1444,7 +1422,7 @@ def _update_current_timestep_data( shared_metadata.sensor_n_contacts, ) - if _resolve_query_mode(shared_metadata) == "sdf": + if (shared_metadata.contact_depth_query or "sdf") == "sdf": _kernel_kinematic_taxel( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, @@ -1475,10 +1453,10 @@ def _update_current_timestep_data( measured_cols_b, ) else: - from genesis.engine.sensors.raycaster import update_solver_bvhs - B, n_sensors = shared_metadata.sensor_n_contacts.shape - _ensure_candidate_geom_mask(shared_metadata, B, n_sensors, solver.n_geoms) + mask_shape = (B, n_sensors, solver.n_geoms) + if tuple(shared_metadata.sensor_candidate_geom_mask.shape) != mask_shape: + shared_metadata.sensor_candidate_geom_mask = torch.zeros(mask_shape, dtype=gs.tc_bool, device=gs.device) _kernel_build_sensor_candidate_geom_mask( shared_metadata.sensor_contacts_idx, shared_metadata.sensor_n_contacts, diff --git a/genesis/engine/sensors/point_cloud_tactile.py b/genesis/engine/sensors/point_cloud_tactile.py index 455a533cd3..353c0ca565 100644 --- a/genesis/engine/sensors/point_cloud_tactile.py +++ b/genesis/engine/sensors/point_cloud_tactile.py @@ -29,6 +29,7 @@ func_noised_probe_radius, get_measured_bufs, ) +from .raycaster import ensure_solver_bvhs, update_solver_bvhs from .tactile_shared import ( BVH_LEAF_SIZE, BVH_STACK_SIZE, @@ -47,6 +48,11 @@ register_grid_fft_sensor, ) +# Conservative cap for global-BVH closest-point walks in raycast mode. Points farther than this from every candidate +# triangle map to depth = 0 (so the elastomer "out of contact" branch fires). Sized to cover realistic elastomer +# penetrations -- bumping it widens BVH traversal cost but doesn't change correctness for in-contact probes. +_ELASTOMER_RAYCAST_QUERY_DIST = 0.1 + if TYPE_CHECKING: from genesis.options.sensors import SensorOptions from genesis.utils.ring_buffer import TensorRingBuffer @@ -1774,17 +1780,14 @@ def build(self): if self._link is None: gs.raise_exception("ElastomerTaxel must be attached to a rigid link with collision geometry.") - # Last-value-wins propagation of contact_depth_query. SDF activation is skipped only when this sensor's own - # option is explicitly "raycast" -- with last-value-wins semantics this can leave SDF activated in mixed-mode - # configurations, but never broken (an unused activated SDF is harmless). + # Last-value-wins propagation of contact_depth_query. Skip activate_sdf only when this sensor explicitly opts + # into raycast -- with last-wins, mixed-mode scenes may end up with SDF activated unused (harmless). mode = self._options.contact_depth_query if mode is not None: self._shared_metadata.contact_depth_query = mode if mode != "raycast": solver.collider.activate_sdf() if self._shared_metadata.contact_depth_query == "raycast": - from genesis.engine.sensors.raycaster import ensure_solver_bvhs - self._shared_metadata.collision_bvh = ensure_solver_bvhs(self._manager._sim) elastomer_geom_start_row = self._shared_metadata.elastomer_geom_idx.shape[0] @@ -2032,21 +2035,15 @@ def _update_current_timestep_data( shared_metadata.probe_depth_buf, ) else: - from genesis.engine.sensors.raycaster import update_solver_bvhs - - shared_metadata.contact_depth_query = "raycast" # latch update_solver_bvhs(shared_metadata.collision_bvh) rigid_entry = next(e for e in shared_metadata.collision_bvh if e.raycast_mask is None) - # Conservative BVH walk cap. Far enough to cover any realistic elastomer penetration; probes outside - # this radius from every candidate triangle map to depth = 0. - max_query_dist = float(0.1) _kernel_elastomer_probe_depth_bvh( shared_metadata.probe_positions, shared_metadata.probe_sensor_idx, shared_metadata.probe_radii, shared_metadata.links_idx, shared_metadata.sensor_candidate_geom_mask, - max_query_dist, + _ELASTOMER_RAYCAST_QUERY_DIST, rigid_entry.bvh.nodes, rigid_entry.bvh.morton_codes, solver.links_state, @@ -2146,7 +2143,7 @@ def _update_current_timestep_data( shared_metadata.elastomer_contact_sdf_enter, shared_metadata.elastomer_contact_sdf_exit, _ELASTOMER_QUERY_AABB_MARGIN, - float(0.1), # max_query_dist for the global-BVH closest-point search + _ELASTOMER_RAYCAST_QUERY_DIST, rigid_entry.bvh.nodes, rigid_entry.bvh.morton_codes, solver.links_state, diff --git a/genesis/engine/sensors/tactile_shared.py b/genesis/engine/sensors/tactile_shared.py index b16ef66df7..00b9e0cf3e 100644 --- a/genesis/engine/sensors/tactile_shared.py +++ b/genesis/engine/sensors/tactile_shared.py @@ -442,15 +442,14 @@ class ContactDepthQueryMetadataMixin: """ Shared per-sensor-class state for the contact-depth query backend. - ``contact_depth_query`` is the single resolved mode for every sensor of this class -- ``"sdf"`` or ``"raycast"``, - defaulting to ``None`` until the first sensor's ``build()`` sets it. Sensors of the same class must agree (mismatch - raises in ``build``); different classes may independently choose modes. - - When mode is ``"raycast"``, ``collision_bvh`` holds a reference to the per-sim shared BVH list built lazily by - ``ensure_solver_bvhs`` (genesis/engine/sensors/raycaster.py). ``sensor_candidate_geom_mask`` is a per-(env, sensor) - bool gate scattered into per step from the contact prefilter list (KinematicTactile family) or once at build from - ``sensor_track_geom_idx`` (ElastomerTaxel); BVH leaves whose ``faces_info.geom_idx`` falls outside the mask are - skipped, preserving the SDF path's per-sensor geom-filtering semantics. + ``contact_depth_query`` is the resolved mode for every sensor of this class -- ``"sdf"`` or ``"raycast"``. Each + sensor's ``build()`` overwrites the field with its own option when non-``None`` (last-value-wins); ``None`` at + update time falls back to ``"sdf"``. + + When mode is ``"raycast"``, ``collision_bvh`` references the per-sim shared BVH list built by + ``ensure_solver_bvhs``. ``sensor_candidate_geom_mask`` is a per-(env, sensor, geom) bool gate -- scattered per + step from the contact prefilter (KinematicTactile family) or once at build from ``sensor_track_geom_idx`` + (ElastomerTaxel) -- so BVH leaves whose ``faces_info.geom_idx`` falls outside the mask are skipped. """ contact_depth_query: str | None = None diff --git a/tests/test_sensors.py b/tests/test_sensors.py index c34eea7a3c..5d13fc76c6 100644 --- a/tests/test_sensors.py +++ b/tests/test_sensors.py @@ -2845,12 +2845,33 @@ def test_tactile_contact_depth_query_sdf_vs_raycast_parity(show_viewer): profiling_options=gs.options.ProfilingOptions(show_FPS=False), show_viewer=show_viewer, ) - pad = scene.add_entity(gs.morphs.Box(size=PAD_SIZE, pos=(0.0, 0.0, PAD_SIZE[2] / 2), fixed=True)) - ball = scene.add_entity(gs.morphs.Sphere(radius=BALL_R, pos=(0.0, 0.0, 0.4))) + pad = scene.add_entity( + gs.morphs.Box( + size=PAD_SIZE, + pos=(0.0, 0.0, PAD_SIZE[2] / 2), + fixed=True, + ), + ) + ball = scene.add_entity( + gs.morphs.Sphere( + radius=BALL_R, + pos=(0.0, 0.0, 0.4), + ), + ) common = dict(entity_idx=pad.idx, probe_local_pos=(CENTER_PROBE,), probe_radius=PROBE_R) - depth_sdf = scene.add_sensor(gs.sensors.ContactDepthProbe(contact_depth_query="sdf", **common)) - depth_ray = scene.add_sensor(gs.sensors.ContactDepthProbe(contact_depth_query="raycast", **common)) + depth_sdf = scene.add_sensor( + gs.sensors.ContactDepthProbe( + contact_depth_query="sdf", + **common, + ), + ) + depth_ray = scene.add_sensor( + gs.sensors.ContactDepthProbe( + contact_depth_query="raycast", + **common, + ), + ) kin_sdf = scene.add_sensor( gs.sensors.KinematicTaxel( probe_local_normal=(0.0, 0.0, 1.0), @@ -2881,8 +2902,18 @@ def test_tactile_contact_depth_query_sdf_vs_raycast_parity(show_viewer): track_link_idx=(ball.base_link_idx,), n_sample_points=200, ) - elast_sdf = scene.add_sensor(gs.sensors.ElastomerTaxel(contact_depth_query="sdf", **elast_common)) - elast_ray = scene.add_sensor(gs.sensors.ElastomerTaxel(contact_depth_query="raycast", **elast_common)) + elast_sdf = scene.add_sensor( + gs.sensors.ElastomerTaxel( + contact_depth_query="sdf", + **elast_common, + ), + ) + elast_ray = scene.add_sensor( + gs.sensors.ElastomerTaxel( + contact_depth_query="raycast", + **elast_common, + ), + ) scene.build(n_envs=0) ball.set_pos((0.0, 0.0, PAD_TOP_Z + BALL_R - 0.005)) # 5mm penetration