pytorch · vmoens · Mar 13, 2023 · Mar 20, 2023 · Mar 20, 2023 · Mar 20, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -375,7 +375,7 @@ jobs:
       image: ubuntu-2004-cuda-11.4:202110-01
     resource_class: gpu.nvidia.medium
     environment:
-      image_name: "pytorch/manylinux-cuda117"
+      image_name: "nvidia/cudagl:11.4.0-base"
       TAR_OPTIONS: --no-same-owner
       PYTHON_VERSION: << parameters.python_version >>
       CU_VERSION: << parameters.cu_version >>

diff --git a/.circleci/unittest/linux_examples/scripts/install.sh b/.circleci/unittest/linux_examples/scripts/install.sh
@@ -4,6 +4,8 @@ unset PYTORCH_VERSION
 # For unittest, nightly PyTorch is used as the following section,
 # so no need to set PYTORCH_VERSION.
 # In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
+apt-get update && apt-get install -y git wget gcc g++
+#apt-get update && apt-get install -y git wget freeglut3 freeglut3-dev
 
 set -e
 

diff --git a/.circleci/unittest/linux_examples/scripts/run_test.sh b/.circleci/unittest/linux_examples/scripts/run_test.sh
@@ -8,6 +8,8 @@
 
 set -e
 
+apt-get update && apt-get remove swig -y && apt-get install -y git gcc patchelf libosmesa6-dev libgl1-mesa-glx libglfw3 swig3.0 wget freeglut3 freeglut3-dev
+
 eval "$(./conda/bin/conda shell.bash hook)"
 conda activate ./env
 
@@ -27,19 +29,75 @@ export MKL_THREADING_LAYER=GNU
 python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
 python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20
 
+# ========================================================================================
+# DDPG
+# ----
+#
+# Modalities:
+# ^^^^^^^^^^^
+#
+# pixels on/off
+# Batched on/off
+#
 # With batched environments
 python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
-  total_frames=48 \
-  init_random_frames=10 \
-  batch_size=10 \
-  frames_per_batch=16 \
-  num_workers=4 \
-  env_per_collector=2 \
-  collector_devices=cuda:0 \
-  optim_steps_per_batch=1 \
-  record_video=True \
-  record_frames=4 \
-  buffer_size=120
+  collector.total_frames=48 \
+  collector.init_random_frames=10 \
+  collector.frames_per_batch=16 \
+  collector.num_collectors=4 \
+  collector.collector_devices=cuda:0 \
+  env.num_envs=2 \
+  optim.batch_size=10 \
+  optim.optim_steps_per_batch=1 \
+  recorder.video=True \
+  recorder.frames=4 \
+  replay_buffer.capacity=120 \
+  env.from_pixels=False \
+  logger.backend=csv
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
+  collector.total_frames=48 \
+  collector.init_random_frames=10 \
+  collector.frames_per_batch=16 \
+  collector.num_collectors=4 \
+  collector.collector_devices=cuda:0 \
+  env.num_envs=2 \
+  optim.batch_size=10 \
+  optim.optim_steps_per_batch=1 \
+  recorder.video=True \
+  recorder.frames=4 \
+  replay_buffer.capacity=120 \
+  env.from_pixels=True \
+  logger.backend=csv
+# With single envs
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
+  collector.total_frames=48 \
+  collector.init_random_frames=10 \
+  collector.frames_per_batch=16 \
+  collector.num_collectors=4 \
+  collector.collector_devices=cuda:0 \
+  env.num_envs=1 \
+  optim.batch_size=10 \
+  optim.optim_steps_per_batch=1 \
+  recorder.video=True \
+  recorder.frames=4 \
+  replay_buffer.capacity=120 \
+  env.from_pixels=False \
+  logger.backend=csv
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
+  collector.total_frames=48 \
+  collector.init_random_frames=10 \
+  collector.frames_per_batch=16 \
+  collector.num_collectors=4 \
+  collector.collector_devices=cuda:0 \
+  env.num_envs=1 \
+  optim.batch_size=10 \
+  optim.optim_steps_per_batch=1 \
+  recorder.video=True \
+  recorder.frames=4 \
+  replay_buffer.capacity=120 \
+  env.from_pixels=True \
+  logger.backend=csv
+
 python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
   total_frames=48 \
   batch_size=10 \
@@ -112,19 +170,6 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
   buffer_size=120 \
   rssm_hidden_dim=17
 
-# With single envs
-python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
-  total_frames=48 \
-  init_random_frames=10 \
-  batch_size=10 \
-  frames_per_batch=16 \
-  num_workers=2 \
-  env_per_collector=1 \
-  collector_devices=cuda:0 \
-  optim_steps_per_batch=1 \
-  record_video=True \
-  record_frames=4 \
-  buffer_size=120
 python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
   total_frames=48 \
   batch_size=10 \

diff --git a/.circleci/unittest/linux_examples/scripts/setup_env.sh b/.circleci/unittest/linux_examples/scripts/setup_env.sh
@@ -9,6 +9,8 @@ set -e
 
 this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 # Avoid error: "fatal: unsafe repository"
+apt-get update && apt-get install -y git wget gcc g++
+
 git config --global --add safe.directory '*'
 root_dir="$(git rev-parse --show-toplevel)"
 conda_dir="${root_dir}/conda"
@@ -71,48 +73,12 @@ conda env config vars set MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
   MUJOCO_GL=$PRIVATE_MUJOCO_GL \
   PYOPENGL_PLATFORM=$PRIVATE_MUJOCO_GL
 
-# Software rendering requires GLX and OSMesa.
-if [ $PRIVATE_MUJOCO_GL == 'egl' ] || [ $PRIVATE_MUJOCO_GL == 'osmesa' ] ; then
-  yum makecache
-  yum install -y glfw
-  yum install -y glew
-  yum install -y mesa-libGL
-  yum install -y mesa-libGL-devel
-  yum install -y mesa-libOSMesa-devel
-  yum -y install egl-utils
-  yum -y install freeglut
-fi
-
 pip install pip --upgrade
 
 conda env update --file "${this_dir}/environment.yml" --prune
 
 conda deactivate
 conda activate "${env_dir}"
 
-if [[ $OSTYPE != 'darwin'* ]]; then
-  # install ale-py: manylinux names are broken for CentOS so we need to manually download and
-  # rename them
-  PY_VERSION=$(python --version)
-  if [[ $PY_VERSION == *"3.7"* ]]; then
-    wget https://files.pythonhosted.org/packages/ab/fd/6615982d9460df7f476cad265af1378057eee9daaa8e0026de4cedbaffbd/ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.8"* ]]; then
-    wget https://files.pythonhosted.org/packages/0f/8a/feed20571a697588bc4bfef05d6a487429c84f31406a52f8af295a0346a2/ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.9"* ]]; then
-    wget https://files.pythonhosted.org/packages/a0/98/4316c1cedd9934f9a91b6e27a9be126043b4445594b40cfa391c8de2e5e8/ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.10"* ]]; then
-    wget https://files.pythonhosted.org/packages/60/1b/3adde7f44f79fcc50d0a00a0643255e48024c4c3977359747d149dc43500/ale_py-0.8.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-    mv ale_py-0.8.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  fi
-  pip install "gymnasium[atari,accept-rom-license]"
-else
-  pip install "gymnasium[atari,accept-rom-license]"
-fi
+pip install ale-py
+pip install "gymnasium[atari,accept-rom-license]"
diff --git a/examples/ddpg/config.yaml b/examples/ddpg/config.yaml
@@ -1,36 +1,65 @@
-env_name: HalfCheetah-v4
-env_task: ""
-env_library: gym
-async_collection: 1
-record_video: 0
-normalize_rewards_online: 1
-normalize_rewards_online_scale: 5
-frame_skip: 1
-frames_per_batch: 1024
-optim_steps_per_batch: 128
-batch_size: 256
-total_frames: 1000000
-prb: 1
-lr: 3e-4
-ou_exploration: 1
-multi_step: 1
-init_random_frames: 25000
-activation: elu
-gSDE: 0
-from_pixels: 0
-#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
-collector_devices: [cpu,cpu,cpu,cpu]
-env_per_collector: 8
-num_workers: 32
-lr_scheduler: ""
-value_network_update_interval: 200
-record_interval: 10
-max_frames_per_traj: -1
-weight_decay: 0.0
-annealing_frames: 1000000
-init_env_steps: 10000
-record_frames: 10000
-loss_function: smooth_l1
-batch_transform: 1
-buffer_prefetch: 64
-norm_stats: 1
+# task and env
+env:
+  env_name: HalfCheetah-v4
+  env_task: ""
+  env_library: gym
+  normalize_rewards_online: 1
+  normalize_rewards_online_scale: 5
+  frame_skip: 1
+  norm_stats: 1
+  num_envs: 4
+  n_samples_stats: 1000
+  noop: 1
+  reward_scaling:
+  from_pixels: False
+
+# collector
+collector:
+  async_collection: 1
+  frames_per_batch: 1024
+  total_frames: 1000000
+  multi_step: 3  # 0 to disable
+  init_random_frames: 25000
+  collector_devices: cpu  # [cpu,cpu,cpu,cpu]
+  num_collectors: 4
+  max_frames_per_traj: -1
+
+# eval
+recorder:
+  video: True
+  interval: 10000  # record interval in frames
+  frames: 10000
+
+# logger
+logger:
+  backend: wandb
+  exp_name: ddpg_cheetah_gym
+
+# Buffer
+replay_buffer:
+  prb: 1
+  buffer_prefetch: 64
+  capacity: 1_000_000
+
+# Optim
+optim:
+  device: cpu
+  lr: 3e-4
+  weight_decay: 0.0
+  batch_size: 256
+  lr_scheduler: ""
+  value_network_update_interval: 200
+  optim_steps_per_batch: 8
+
+# Policy and model
+model:
+  ou_exploration: 1
+  annealing_frames: 1000000
+  noisy: False
+  activation: elu
+
+# loss
+loss:
+  loss_function: smooth_l1
+  gamma: 0.99
+  tau: 0.05