I am trying to get gradients wrt control inputs, but the gradient graph seems to be broken. I can get grads wrt initial pose (as in the unit test) but not for control.
import time
import numpy as np
import torch
import genesis as gs
from etils import epath
from torchviz import make_dot
gs.init(logging_level="warning", backend=gs.cuda)
dt = 1e-2
horizon = 10
substeps = 1
num_iter = 400
lr = 5e-2
scene = gs.Scene(
sim_options=gs.options.SimOptions(
dt=dt,
substeps=substeps,
requires_grad=True,
gravity=(0.0, 0.0, -9.81),
),
rigid_options=gs.options.RigidOptions(
enable_collision=False,
enable_self_collision=False,
enable_joint_limit=False,
disable_constraint=False,
use_contact_island=False,
use_hibernation=False,
),
viewer_options=gs.options.ViewerOptions(
camera_pos=(0.0, -4.0, 1.5),
camera_lookat=(0.0, 0.0, 1.0),
camera_fov=30.0,
res=(960, 640),
max_FPS=240,
),
show_viewer=True,
)
mjcf_path = str(epath.resource_path('genesis_playground') / "envs/xmls/cartpole.xml")
cartpole = scene.add_entity(
gs.morphs.MJCF(file=mjcf_path),
)
# Joints: slider (cart x), hinge_1 (pole angle)
slider_joint = cartpole.get_joint("slider")
hinge_joint = cartpole.get_joint("hinge_1")
# Local q indices (for generalized coordinates)
slider_q_idx = slider_joint.q_idx_local
hinge_q_idx = hinge_joint.q_idx_local
scene.build()
cartpole.set_dofs_armature([0.0, 0.0], [slider_q_idx, hinge_q_idx])
cartpole.set_dofs_force_range(
lower=np.array([-10.0, -10.0]),
upper=np.array([10.0, 10.0]),
dofs_idx_local=[slider_q_idx, hinge_q_idx]
)
# Learnable initial slider position and hinge angle
q_init = gs.tensor([1.0, 0.5], requires_grad=True)
q = q_init.detach()
q.requires_grad_()
control = gs.tensor([3.0], requires_grad=True)
optimizer = torch.optim.Adam([control, q], lr=lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
optimizer, T_max=num_iter, eta_min=1e-3
)
# Optimization loop
for it in range(num_iter):
# scene.reset()
cartpole.set_qpos(q)
cartpole.set_dofs_velocity(control, slider_q_idx)
# Rollout
for i in range(horizon):
scene.step()
# Final state
q_final = cartpole.get_qpos()
x_T = q_final[slider_q_idx]
theta_T = q_final[hinge_q_idx]
# Loss: ONLY make pole upright at final time (theta_T -> 0)
loss = theta_T**2 + x_T**2
optimizer.zero_grad()
make_dot(loss,
params={
"q": q,
"control": control,
"q_final": q_final,
"x_T": x_T,
"theta_T": theta_T,
"loss": loss,
},
show_attrs=True,
show_saved=True).render("autograd_graph", format="png")
# retain_graph=True so Genesis' internal graph is not freed
loss.backward(retain_graph=True)
optimizer.step()
scheduler.step()
print(
f"[iter {it:03d}] "
f"grad={q.grad}, "
f"ctrl grad={control.grad}, "
)
[iter 000] grad=tensor([2.0284, 1.0394], device='cuda:0'), ctrl grad=None,
[iter 001] grad=tensor([1.9287, 0.9310], device='cuda:0'), ctrl grad=None,
[iter 002] grad=tensor([1.8292, 0.8229], device='cuda:0'), ctrl grad=None,
[iter 003] grad=tensor([1.7300, 0.7156], device='cuda:0'), ctrl grad=None,
[iter 004] grad=tensor([1.6313, 0.6097], device='cuda:0'), ctrl grad=None,
Bug Description
I am trying to get gradients wrt control inputs, but the gradient graph seems to be broken. I can get grads wrt initial pose (as in the unit test) but not for control.
Steps to Reproduce
Expected Behavior
Gradients not none
Screenshots/Videos
Relevant log output
Environment
Release version or Commit ID
a2787f5
Additional Context
No response